mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
Merge branch 'main' into make-kvstore-optional
This commit is contained in:
commit
f62e6cb063
554 changed files with 63962 additions and 4870 deletions
82
.github/actions/run-and-record-tests/action.yml
vendored
Normal file
82
.github/actions/run-and-record-tests/action.yml
vendored
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
name: 'Run and Record Tests'
|
||||||
|
description: 'Run integration tests and handle recording/artifact upload'
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
test-types:
|
||||||
|
description: 'JSON array of test types to run'
|
||||||
|
required: true
|
||||||
|
stack-config:
|
||||||
|
description: 'Stack configuration to use'
|
||||||
|
required: true
|
||||||
|
provider:
|
||||||
|
description: 'Provider to use for tests'
|
||||||
|
required: true
|
||||||
|
inference-mode:
|
||||||
|
description: 'Inference mode (record or replay)'
|
||||||
|
required: true
|
||||||
|
run-vision-tests:
|
||||||
|
description: 'Whether to run vision tests'
|
||||||
|
required: false
|
||||||
|
default: 'false'
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: 'composite'
|
||||||
|
steps:
|
||||||
|
- name: Check Storage and Memory Available Before Tests
|
||||||
|
if: ${{ always() }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
free -h
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: Run Integration Tests
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
./scripts/integration-tests.sh \
|
||||||
|
--stack-config '${{ inputs.stack-config }}' \
|
||||||
|
--provider '${{ inputs.provider }}' \
|
||||||
|
--test-types '${{ inputs.test-types }}' \
|
||||||
|
--inference-mode '${{ inputs.inference-mode }}' \
|
||||||
|
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }}
|
||||||
|
|
||||||
|
|
||||||
|
- name: Commit and push recordings
|
||||||
|
if: ${{ inputs.inference-mode == 'record' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "Checking for recording changes"
|
||||||
|
git status --porcelain tests/integration/recordings/
|
||||||
|
|
||||||
|
if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then
|
||||||
|
echo "New recordings detected, committing and pushing"
|
||||||
|
git add tests/integration/recordings/
|
||||||
|
|
||||||
|
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||||
|
git commit -m "Recordings update from CI (vision)"
|
||||||
|
else
|
||||||
|
git commit -m "Recordings update from CI"
|
||||||
|
fi
|
||||||
|
|
||||||
|
git fetch origin ${{ github.event.pull_request.head.ref }}
|
||||||
|
git rebase origin/${{ github.event.pull_request.head.ref }}
|
||||||
|
echo "Rebased successfully"
|
||||||
|
git push origin HEAD:${{ github.event.pull_request.head.ref }}
|
||||||
|
echo "Pushed successfully"
|
||||||
|
else
|
||||||
|
echo "No recording changes"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Write inference logs to file
|
||||||
|
if: ${{ always() }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
||||||
|
|
||||||
|
- name: Upload logs
|
||||||
|
if: ${{ always() }}
|
||||||
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
|
with:
|
||||||
|
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
|
||||||
|
path: |
|
||||||
|
*.log
|
||||||
|
retention-days: 1
|
14
.github/actions/setup-ollama/action.yml
vendored
14
.github/actions/setup-ollama/action.yml
vendored
|
@ -1,11 +1,23 @@
|
||||||
name: Setup Ollama
|
name: Setup Ollama
|
||||||
description: Start Ollama
|
description: Start Ollama
|
||||||
|
inputs:
|
||||||
|
run-vision-tests:
|
||||||
|
description: 'Run vision tests: "true" or "false"'
|
||||||
|
required: false
|
||||||
|
default: 'false'
|
||||||
runs:
|
runs:
|
||||||
using: "composite"
|
using: "composite"
|
||||||
steps:
|
steps:
|
||||||
- name: Start Ollama
|
- name: Start Ollama
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
|
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
|
||||||
|
image="ollama-with-vision-model"
|
||||||
|
else
|
||||||
|
image="ollama-with-models"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Starting Ollama with image: $image"
|
||||||
|
docker run -d --name ollama -p 11434:11434 docker.io/llamastack/$image
|
||||||
echo "Verifying Ollama status..."
|
echo "Verifying Ollama status..."
|
||||||
timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
|
timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'
|
||||||
|
|
51
.github/actions/setup-test-environment/action.yml
vendored
Normal file
51
.github/actions/setup-test-environment/action.yml
vendored
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
name: 'Setup Test Environment'
|
||||||
|
description: 'Common setup steps for integration tests including dependencies, providers, and build'
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
python-version:
|
||||||
|
description: 'Python version to use'
|
||||||
|
required: true
|
||||||
|
client-version:
|
||||||
|
description: 'Client version (latest or published)'
|
||||||
|
required: true
|
||||||
|
provider:
|
||||||
|
description: 'Provider to setup (ollama or vllm)'
|
||||||
|
required: true
|
||||||
|
default: 'ollama'
|
||||||
|
run-vision-tests:
|
||||||
|
description: 'Whether to setup provider for vision tests'
|
||||||
|
required: false
|
||||||
|
default: 'false'
|
||||||
|
inference-mode:
|
||||||
|
description: 'Inference mode (record or replay)'
|
||||||
|
required: true
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: 'composite'
|
||||||
|
steps:
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
with:
|
||||||
|
python-version: ${{ inputs.python-version }}
|
||||||
|
client-version: ${{ inputs.client-version }}
|
||||||
|
|
||||||
|
- name: Setup ollama
|
||||||
|
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
|
||||||
|
uses: ./.github/actions/setup-ollama
|
||||||
|
with:
|
||||||
|
run-vision-tests: ${{ inputs.run-vision-tests }}
|
||||||
|
|
||||||
|
- name: Setup vllm
|
||||||
|
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
|
||||||
|
uses: ./.github/actions/setup-vllm
|
||||||
|
|
||||||
|
- name: Build Llama Stack
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
uv run llama stack build --template ci-tests --image-type venv
|
||||||
|
|
||||||
|
- name: Configure git for commits
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||||
|
git config --local user.name "github-actions[bot]"
|
6
.github/workflows/README.md
vendored
6
.github/workflows/README.md
vendored
|
@ -1,19 +1,19 @@
|
||||||
# Llama Stack CI
|
# Llama Stack CI
|
||||||
|
|
||||||
Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
|
Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
|
||||||
|
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
||||||
| Coverage Badge | [coverage-badge.yml](coverage-badge.yml) | Creates PR for updating the code coverage badge |
|
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
|
||||||
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
|
||||||
| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite with Ollama |
|
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
|
||||||
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||||
|
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||||
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
||||||
| Close stale issues and PRs | [stale_bot.yml](stale_bot.yml) | Run the Stale Bot action |
|
| Close stale issues and PRs | [stale_bot.yml](stale_bot.yml) | Run the Stale Bot action |
|
||||||
| Test External Providers Installed via Module | [test-external-provider-module.yml](test-external-provider-module.yml) | Test External Provider installation via Python module |
|
| Test External Providers Installed via Module | [test-external-provider-module.yml](test-external-provider-module.yml) | Test External Provider installation via Python module |
|
||||||
|
|
62
.github/workflows/coverage-badge.yml
vendored
62
.github/workflows/coverage-badge.yml
vendored
|
@ -1,62 +0,0 @@
|
||||||
name: Coverage Badge
|
|
||||||
|
|
||||||
run-name: Creates PR for updating the code coverage badge
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/**'
|
|
||||||
- 'tests/unit/**'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'requirements.txt'
|
|
||||||
- '.github/workflows/unit-tests.yml'
|
|
||||||
- '.github/workflows/coverage-badge.yml' # This workflow
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
unit-tests:
|
|
||||||
permissions:
|
|
||||||
contents: write # for peter-evans/create-pull-request to create branch
|
|
||||||
pull-requests: write # for peter-evans/create-pull-request to create a PR
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Run unit tests
|
|
||||||
run: |
|
|
||||||
./scripts/unit-tests.sh
|
|
||||||
|
|
||||||
- name: Coverage Badge
|
|
||||||
uses: tj-actions/coverage-badge-py@1788babcb24544eb5bbb6e0d374df5d1e54e670f # v2.0.4
|
|
||||||
|
|
||||||
- name: Verify Changed files
|
|
||||||
uses: tj-actions/verify-changed-files@a1c6acee9df209257a246f2cc6ae8cb6581c1edf # v20.0.4
|
|
||||||
id: verify-changed-files
|
|
||||||
with:
|
|
||||||
files: coverage.svg
|
|
||||||
|
|
||||||
- name: Commit files
|
|
||||||
if: steps.verify-changed-files.outputs.files_changed == 'true'
|
|
||||||
run: |
|
|
||||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
|
||||||
git config --local user.name "github-actions[bot]"
|
|
||||||
git add coverage.svg
|
|
||||||
git commit -m "Updated coverage.svg"
|
|
||||||
|
|
||||||
- name: Create Pull Request
|
|
||||||
if: steps.verify-changed-files.outputs.files_changed == 'true'
|
|
||||||
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
title: "ci: [Automatic] Coverage Badge Update"
|
|
||||||
body: |
|
|
||||||
This PR updates the coverage badge based on the latest coverage report.
|
|
||||||
|
|
||||||
Automatically generated by the [workflow coverage-badge.yaml](.github/workflows/coverage-badge.yaml)
|
|
||||||
delete-branch: true
|
|
127
.github/workflows/integration-tests.yml
vendored
127
.github/workflows/integration-tests.yml
vendored
|
@ -1,20 +1,22 @@
|
||||||
name: Integration Tests
|
name: Integration Tests (Replay)
|
||||||
|
|
||||||
run-name: Run the integration test suite with Ollama
|
run-name: Run the integration test suite from tests/integration in replay mode
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'llama_stack/**'
|
||||||
- 'tests/**'
|
- 'tests/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- 'requirements.txt'
|
|
||||||
- '.github/workflows/integration-tests.yml' # This workflow
|
- '.github/workflows/integration-tests.yml' # This workflow
|
||||||
- '.github/actions/setup-ollama/action.yml'
|
- '.github/actions/setup-ollama/action.yml'
|
||||||
|
- '.github/actions/setup-test-environment/action.yml'
|
||||||
|
- '.github/actions/run-and-record-tests/action.yml'
|
||||||
schedule:
|
schedule:
|
||||||
# If changing the cron schedule, update the provider in the test-matrix job
|
# If changing the cron schedule, update the provider in the test-matrix job
|
||||||
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
||||||
|
@ -31,129 +33,64 @@ on:
|
||||||
default: 'ollama'
|
default: 'ollama'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
# Skip concurrency for pushes to main - each commit should be tested independently
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
discover-tests:
|
discover-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
test-type: ${{ steps.generate-matrix.outputs.test-type }}
|
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
- name: Generate test matrix
|
- name: Generate test types
|
||||||
id: generate-matrix
|
id: generate-test-types
|
||||||
run: |
|
run: |
|
||||||
# Get test directories dynamically, excluding non-test directories
|
# Get test directories dynamically, excluding non-test directories
|
||||||
|
# NOTE: we are excluding post_training since the tests take too long
|
||||||
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
||||||
grep -Ev "^(__pycache__|fixtures|test_cases)$" |
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
|
||||||
sort | jq -R -s -c 'split("\n")[:-1]')
|
sort | jq -R -s -c 'split("\n")[:-1]')
|
||||||
echo "test-type=$TEST_TYPES" >> $GITHUB_OUTPUT
|
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
test-matrix:
|
run-replay-mode-tests:
|
||||||
needs: discover-tests
|
needs: discover-tests
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
|
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
|
||||||
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
|
||||||
python-version: ["3.12", "3.13"]
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
exclude: # TODO: look into why these tests are failing and fix them
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
- provider: vllm
|
run-vision-tests: [true, false]
|
||||||
test-type: safety
|
|
||||||
- provider: vllm
|
|
||||||
test-type: post_training
|
|
||||||
- provider: vllm
|
|
||||||
test-type: tool_runtime
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Setup test environment
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-test-environment
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
|
provider: ${{ matrix.provider }}
|
||||||
|
run-vision-tests: ${{ matrix.run-vision-tests }}
|
||||||
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Setup ollama
|
- name: Run tests
|
||||||
if: ${{ matrix.provider == 'ollama' }}
|
uses: ./.github/actions/run-and-record-tests
|
||||||
uses: ./.github/actions/setup-ollama
|
|
||||||
|
|
||||||
- name: Setup vllm
|
|
||||||
if: ${{ matrix.provider == 'vllm' }}
|
|
||||||
uses: ./.github/actions/setup-vllm
|
|
||||||
|
|
||||||
- name: Build Llama Stack
|
|
||||||
run: |
|
|
||||||
uv run llama stack build --template ci-tests --image-type venv
|
|
||||||
|
|
||||||
- name: Check Storage and Memory Available Before Tests
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Run Integration Tests
|
|
||||||
env:
|
|
||||||
LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
|
|
||||||
# Use 'shell' to get pipefail behavior
|
|
||||||
# https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
|
|
||||||
# TODO: write a precommit hook to detect if a test contains a pipe but does not use 'shell: bash'
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
if [ "${{ matrix.client-type }}" == "library" ]; then
|
|
||||||
stack_config="ci-tests"
|
|
||||||
else
|
|
||||||
stack_config="server:ci-tests"
|
|
||||||
fi
|
|
||||||
|
|
||||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
|
||||||
if [ "${{ matrix.provider }}" == "ollama" ]; then
|
|
||||||
export OLLAMA_URL="http://0.0.0.0:11434"
|
|
||||||
export TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16
|
|
||||||
export SAFETY_MODEL="ollama/llama-guard3:1b"
|
|
||||||
EXTRA_PARAMS="--safety-shield=llama-guard"
|
|
||||||
else
|
|
||||||
export VLLM_URL="http://localhost:8000/v1"
|
|
||||||
export TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct
|
|
||||||
# TODO: remove the not(test_inference_store_tool_calls) once we can get the tool called consistently
|
|
||||||
EXTRA_PARAMS=
|
|
||||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
|
||||||
-k "not( ${EXCLUDE_TESTS} )" \
|
|
||||||
--text-model=$TEXT_MODEL \
|
|
||||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
|
||||||
--color=yes ${EXTRA_PARAMS} \
|
|
||||||
--capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
|
|
||||||
|
|
||||||
- name: Check Storage and Memory Available After Tests
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Write inference logs to file
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
sudo docker logs ollama > ollama.log || true
|
|
||||||
sudo docker logs vllm > vllm.log || true
|
|
||||||
|
|
||||||
- name: Upload all logs to artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
with:
|
with:
|
||||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
|
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
||||||
path: |
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
*.log
|
provider: ${{ matrix.provider }}
|
||||||
retention-days: 1
|
inference-mode: 'replay'
|
||||||
|
run-vision-tests: ${{ matrix.run-vision-tests }}
|
||||||
|
|
|
@ -24,7 +24,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector"]
|
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector", "remote::weaviate", "remote::qdrant"]
|
||||||
python-version: ["3.12", "3.13"]
|
python-version: ["3.12", "3.13"]
|
||||||
fail-fast: false # we want to run all tests regardless of failure
|
fail-fast: false # we want to run all tests regardless of failure
|
||||||
|
|
||||||
|
@ -48,6 +48,14 @@ jobs:
|
||||||
-e ANONYMIZED_TELEMETRY=FALSE \
|
-e ANONYMIZED_TELEMETRY=FALSE \
|
||||||
chromadb/chroma:latest
|
chromadb/chroma:latest
|
||||||
|
|
||||||
|
- name: Setup Weaviate
|
||||||
|
if: matrix.vector-io-provider == 'remote::weaviate'
|
||||||
|
run: |
|
||||||
|
docker run --rm -d --pull always \
|
||||||
|
--name weaviate \
|
||||||
|
-p 8080:8080 -p 50051:50051 \
|
||||||
|
cr.weaviate.io/semitechnologies/weaviate:1.32.0
|
||||||
|
|
||||||
- name: Start PGVector DB
|
- name: Start PGVector DB
|
||||||
if: matrix.vector-io-provider == 'remote::pgvector'
|
if: matrix.vector-io-provider == 'remote::pgvector'
|
||||||
run: |
|
run: |
|
||||||
|
@ -78,6 +86,29 @@ jobs:
|
||||||
PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
|
PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
|
||||||
-c "CREATE EXTENSION IF NOT EXISTS vector;"
|
-c "CREATE EXTENSION IF NOT EXISTS vector;"
|
||||||
|
|
||||||
|
- name: Setup Qdrant
|
||||||
|
if: matrix.vector-io-provider == 'remote::qdrant'
|
||||||
|
run: |
|
||||||
|
docker run --rm -d --pull always \
|
||||||
|
--name qdrant \
|
||||||
|
-p 6333:6333 \
|
||||||
|
qdrant/qdrant
|
||||||
|
|
||||||
|
- name: Wait for Qdrant to be ready
|
||||||
|
if: matrix.vector-io-provider == 'remote::qdrant'
|
||||||
|
run: |
|
||||||
|
echo "Waiting for Qdrant to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:6333/collections | grep -q '"status":"ok"'; then
|
||||||
|
echo "Qdrant is ready!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
echo "Qdrant failed to start"
|
||||||
|
docker logs qdrant
|
||||||
|
exit 1
|
||||||
|
|
||||||
- name: Wait for ChromaDB to be ready
|
- name: Wait for ChromaDB to be ready
|
||||||
if: matrix.vector-io-provider == 'remote::chromadb'
|
if: matrix.vector-io-provider == 'remote::chromadb'
|
||||||
run: |
|
run: |
|
||||||
|
@ -93,6 +124,21 @@ jobs:
|
||||||
docker logs chromadb
|
docker logs chromadb
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
|
- name: Wait for Weaviate to be ready
|
||||||
|
if: matrix.vector-io-provider == 'remote::weaviate'
|
||||||
|
run: |
|
||||||
|
echo "Waiting for Weaviate to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:8080 | grep -q "https://weaviate.io/developers/weaviate/current/"; then
|
||||||
|
echo "Weaviate is ready!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
echo "Weaviate failed to start"
|
||||||
|
docker logs weaviate
|
||||||
|
exit 1
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --template ci-tests --image-type venv
|
uv run llama stack build --template ci-tests --image-type venv
|
||||||
|
@ -113,6 +159,10 @@ jobs:
|
||||||
PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||||
PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||||
PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
|
||||||
|
ENABLE_QDRANT: ${{ matrix.vector-io-provider == 'remote::qdrant' && 'true' || '' }}
|
||||||
|
QDRANT_URL: ${{ matrix.vector-io-provider == 'remote::qdrant' && 'http://localhost:6333' || '' }}
|
||||||
|
ENABLE_WEAVIATE: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'true' || '' }}
|
||||||
|
WEAVIATE_CLUSTER_URL: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'localhost:8080' || '' }}
|
||||||
run: |
|
run: |
|
||||||
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
|
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
|
||||||
tests/integration/vector_io \
|
tests/integration/vector_io \
|
||||||
|
@ -134,6 +184,11 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
docker logs chromadb > chromadb.log
|
docker logs chromadb > chromadb.log
|
||||||
|
|
||||||
|
- name: Write Qdrant logs to file
|
||||||
|
if: ${{ always() && matrix.vector-io-provider == 'remote::qdrant' }}
|
||||||
|
run: |
|
||||||
|
docker logs qdrant > qdrant.log
|
||||||
|
|
||||||
- name: Upload all logs to artifacts
|
- name: Upload all logs to artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
|
|
38
.github/workflows/pre-commit.yml
vendored
38
.github/workflows/pre-commit.yml
vendored
|
@ -14,10 +14,18 @@ concurrency:
|
||||||
jobs:
|
jobs:
|
||||||
pre-commit:
|
pre-commit:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
with:
|
||||||
|
# For dependabot PRs, we need to checkout with a token that can push changes
|
||||||
|
token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }}
|
||||||
|
# Fetch full history for dependabot PRs to allow commits
|
||||||
|
fetch-depth: ${{ github.actor == 'dependabot[bot]' && 0 || 1 }}
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||||
|
@ -29,15 +37,45 @@ jobs:
|
||||||
.pre-commit-config.yaml
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||||
|
continue-on-error: true
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch
|
||||||
RUFF_OUTPUT_FORMAT: github
|
RUFF_OUTPUT_FORMAT: github
|
||||||
|
|
||||||
|
- name: Debug
|
||||||
|
run: |
|
||||||
|
echo "github.ref: ${{ github.ref }}"
|
||||||
|
echo "github.actor: ${{ github.actor }}"
|
||||||
|
|
||||||
|
- name: Commit changes for dependabot PRs
|
||||||
|
if: github.actor == 'dependabot[bot]'
|
||||||
|
run: |
|
||||||
|
if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
|
||||||
|
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||||
|
git config --local user.name "github-actions[bot]"
|
||||||
|
|
||||||
|
# Ensure we're on the correct branch
|
||||||
|
git checkout -B ${{ github.head_ref }}
|
||||||
|
git add -A
|
||||||
|
git commit -m "Apply pre-commit fixes"
|
||||||
|
|
||||||
|
# Pull latest changes from the PR branch and rebase our commit on top
|
||||||
|
git pull --rebase origin ${{ github.head_ref }}
|
||||||
|
|
||||||
|
# Push to the PR branch
|
||||||
|
git push origin ${{ github.head_ref }}
|
||||||
|
echo "Pre-commit fixes committed and pushed"
|
||||||
|
else
|
||||||
|
echo "No changes to commit"
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Verify if there are any diff files after pre-commit
|
- name: Verify if there are any diff files after pre-commit
|
||||||
|
if: github.actor != 'dependabot[bot]'
|
||||||
run: |
|
run: |
|
||||||
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||||
|
|
||||||
- name: Verify if there are any new files after pre-commit
|
- name: Verify if there are any new files after pre-commit
|
||||||
|
if: github.actor != 'dependabot[bot]'
|
||||||
run: |
|
run: |
|
||||||
unstaged_files=$(git ls-files --others --exclude-standard)
|
unstaged_files=$(git ls-files --others --exclude-standard)
|
||||||
if [ -n "$unstaged_files" ]; then
|
if [ -n "$unstaged_files" ]; then
|
||||||
|
|
42
.github/workflows/providers-build.yml
vendored
42
.github/workflows/providers-build.yml
vendored
|
@ -9,20 +9,20 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/build.py'
|
- 'llama_stack/cli/stack/build.py'
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
- 'llama_stack/cli/stack/_build.py'
|
||||||
- 'llama_stack/distribution/build.*'
|
- 'llama_stack/core/build.*'
|
||||||
- 'llama_stack/distribution/*.sh'
|
- 'llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/build.py'
|
- 'llama_stack/cli/stack/build.py'
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
- 'llama_stack/cli/stack/_build.py'
|
||||||
- 'llama_stack/distribution/build.*'
|
- 'llama_stack/core/build.*'
|
||||||
- 'llama_stack/distribution/*.sh'
|
- 'llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
|
@ -33,23 +33,23 @@ jobs:
|
||||||
generate-matrix:
|
generate-matrix:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
templates: ${{ steps.set-matrix.outputs.templates }}
|
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
- name: Generate Template List
|
- name: Generate Distribution List
|
||||||
id: set-matrix
|
id: set-matrix
|
||||||
run: |
|
run: |
|
||||||
templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||||
echo "templates=$templates" >> "$GITHUB_OUTPUT"
|
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
build:
|
build:
|
||||||
needs: generate-matrix
|
needs: generate-matrix
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
|
distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
|
||||||
image-type: [venv, container]
|
image-type: [venv, container]
|
||||||
fail-fast: false # We want to run all jobs even if some fail
|
fail-fast: false # We want to run all jobs even if some fail
|
||||||
|
|
||||||
|
@ -62,13 +62,13 @@ jobs:
|
||||||
|
|
||||||
- name: Print build dependencies
|
- name: Print build dependencies
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
|
uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
|
||||||
|
|
||||||
- name: Run Llama Stack Build
|
- name: Run Llama Stack Build
|
||||||
run: |
|
run: |
|
||||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
||||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
|
||||||
|
|
||||||
- name: Print dependencies in the image
|
- name: Print dependencies in the image
|
||||||
if: matrix.image-type == 'venv'
|
if: matrix.image-type == 'venv'
|
||||||
|
@ -99,16 +99,16 @@ jobs:
|
||||||
|
|
||||||
- name: Build a single provider
|
- name: Build a single provider
|
||||||
run: |
|
run: |
|
||||||
yq -i '.image_type = "container"' llama_stack/templates/ci-tests/build.yaml
|
yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
|
||||||
yq -i '.image_name = "test"' llama_stack/templates/ci-tests/build.yaml
|
yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/templates/ci-tests/build.yaml
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
||||||
- name: Inspect the container image entrypoint
|
- name: Inspect the container image entrypoint
|
||||||
run: |
|
run: |
|
||||||
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
@ -122,27 +122,27 @@ jobs:
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
- name: Pin template to UBI9 base
|
- name: Pin distribution to UBI9 base
|
||||||
run: |
|
run: |
|
||||||
yq -i '
|
yq -i '
|
||||||
.image_type = "container" |
|
.image_type = "container" |
|
||||||
.image_name = "ubi9-test" |
|
.image_name = "ubi9-test" |
|
||||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||||
' llama_stack/templates/ci-tests/build.yaml
|
' llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
||||||
- name: Build dev container (UBI9)
|
- name: Build dev container (UBI9)
|
||||||
env:
|
env:
|
||||||
USE_COPY_NOT_MOUNT: "true"
|
USE_COPY_NOT_MOUNT: "true"
|
||||||
LLAMA_STACK_DIR: "."
|
LLAMA_STACK_DIR: "."
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --config llama_stack/templates/ci-tests/build.yaml
|
uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
||||||
- name: Inspect UBI9 image
|
- name: Inspect UBI9 image
|
||||||
run: |
|
run: |
|
||||||
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
109
.github/workflows/record-integration-tests.yml
vendored
Normal file
109
.github/workflows/record-integration-tests.yml
vendored
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
name: Integration Tests (Record)
|
||||||
|
|
||||||
|
run-name: Run the integration test suite from tests/integration
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
types: [opened, synchronize, labeled]
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/**'
|
||||||
|
- 'tests/**'
|
||||||
|
- 'uv.lock'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- '.github/workflows/record-integration-tests.yml' # This workflow
|
||||||
|
- '.github/actions/setup-ollama/action.yml'
|
||||||
|
- '.github/actions/setup-test-environment/action.yml'
|
||||||
|
- '.github/actions/run-and-record-tests/action.yml'
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
test-provider:
|
||||||
|
description: 'Test against a specific provider'
|
||||||
|
type: string
|
||||||
|
default: 'ollama'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
discover-tests:
|
||||||
|
if: contains(github.event.pull_request.labels.*.name, 're-record-tests') ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 're-record-vision-tests')
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
test-types: ${{ steps.generate-test-types.outputs.test-types }}
|
||||||
|
matrix-modes: ${{ steps.generate-test-types.outputs.matrix-modes }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Generate test types
|
||||||
|
id: generate-test-types
|
||||||
|
run: |
|
||||||
|
# Get test directories dynamically, excluding non-test directories
|
||||||
|
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
|
||||||
|
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
|
||||||
|
sort | jq -R -s -c 'split("\n")[:-1]')
|
||||||
|
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
|
||||||
|
echo "labels=$labels"
|
||||||
|
|
||||||
|
modes_array=()
|
||||||
|
if [[ $labels == *"re-record-vision-tests"* ]]; then
|
||||||
|
modes_array+=("vision")
|
||||||
|
fi
|
||||||
|
if [[ $labels == *"re-record-tests"* ]]; then
|
||||||
|
modes_array+=("non-vision")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Convert to JSON array
|
||||||
|
if [ ${#modes_array[@]} -eq 0 ]; then
|
||||||
|
matrix_modes="[]"
|
||||||
|
else
|
||||||
|
matrix_modes=$(printf '%s\n' "${modes_array[@]}" | jq -R -s -c 'split("\n")[:-1]')
|
||||||
|
fi
|
||||||
|
echo "matrix_modes=$matrix_modes"
|
||||||
|
echo "matrix-modes=$matrix_modes" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
record-tests:
|
||||||
|
needs: discover-tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event.pull_request.head.ref }}
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup test environment
|
||||||
|
uses: ./.github/actions/setup-test-environment
|
||||||
|
with:
|
||||||
|
python-version: "3.12" # Use single Python version for recording
|
||||||
|
client-version: "latest"
|
||||||
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||||
|
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
||||||
|
inference-mode: 'record'
|
||||||
|
|
||||||
|
- name: Run and record tests
|
||||||
|
uses: ./.github/actions/run-and-record-tests
|
||||||
|
with:
|
||||||
|
test-types: ${{ needs.discover-tests.outputs.test-types }}
|
||||||
|
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
|
||||||
|
provider: ${{ inputs.test-provider || 'ollama' }}
|
||||||
|
inference-mode: 'record'
|
||||||
|
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
|
|
@ -12,11 +12,13 @@ on:
|
||||||
- 'tests/integration/**'
|
- 'tests/integration/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- 'requirements.txt'
|
- 'tests/external/*'
|
||||||
- '.github/workflows/test-external-provider-module.yml' # This workflow
|
- '.github/workflows/test-external-provider-module.yml' # This workflow
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test-external-providers-from-module:
|
test-external-providers-from-module:
|
||||||
|
# This workflow is disabled. See https://github.com/meta-llama/llama-stack/pull/2975#issuecomment-3138702984 for details
|
||||||
|
if: false
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
|
@ -46,7 +48,7 @@ jobs:
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/ramalama-stack/build.yaml
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
5
.github/workflows/test-external.yml
vendored
5
.github/workflows/test-external.yml
vendored
|
@ -13,6 +13,7 @@ on:
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- 'requirements.txt'
|
- 'requirements.txt'
|
||||||
|
- 'tests/external/*'
|
||||||
- '.github/workflows/test-external.yml' # This workflow
|
- '.github/workflows/test-external.yml' # This workflow
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
@ -42,11 +43,11 @@ jobs:
|
||||||
|
|
||||||
- name: Print distro dependencies
|
- name: Print distro dependencies
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml --print-deps-only
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
2
.github/workflows/unit-tests.yml
vendored
2
.github/workflows/unit-tests.yml
vendored
|
@ -35,6 +35,8 @@ jobs:
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-runner
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python }}
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -19,7 +19,6 @@ repos:
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
args: ["--unsafe"]
|
args: ["--unsafe"]
|
||||||
- id: detect-private-key
|
- id: detect-private-key
|
||||||
- id: requirements-txt-fixer
|
|
||||||
- id: mixed-line-ending
|
- id: mixed-line-ending
|
||||||
args: [--fix=lf] # Forces to replace line ending by LF (line feed)
|
args: [--fix=lf] # Forces to replace line ending by LF (line feed)
|
||||||
- id: check-executables-have-shebangs
|
- id: check-executables-have-shebangs
|
||||||
|
@ -56,14 +55,6 @@ repos:
|
||||||
rev: 0.7.20
|
rev: 0.7.20
|
||||||
hooks:
|
hooks:
|
||||||
- id: uv-lock
|
- id: uv-lock
|
||||||
- id: uv-export
|
|
||||||
args: [
|
|
||||||
"--frozen",
|
|
||||||
"--no-hashes",
|
|
||||||
"--no-emit-project",
|
|
||||||
"--no-default-groups",
|
|
||||||
"--output-file=requirements.txt"
|
|
||||||
]
|
|
||||||
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
rev: v1.16.1
|
rev: v1.16.1
|
||||||
|
|
|
@ -451,7 +451,7 @@ GenAI application developers need more than just an LLM - they need to integrate
|
||||||
|
|
||||||
Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
|
Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
|
||||||
|
|
||||||
With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
|
With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
|
||||||
|
|
||||||
## Release
|
## Release
|
||||||
After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.
|
After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.
|
||||||
|
|
|
@ -164,7 +164,7 @@ Some tips about common tasks you work on while contributing to Llama Stack:
|
||||||
|
|
||||||
### Using `llama stack build`
|
### Using `llama stack build`
|
||||||
|
|
||||||
Building a stack image (conda / docker) will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
```bash
|
```bash
|
||||||
|
@ -172,7 +172,7 @@ cd work/
|
||||||
git clone https://github.com/meta-llama/llama-stack.git
|
git clone https://github.com/meta-llama/llama-stack.git
|
||||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
|
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
||||||
```
|
```
|
||||||
|
|
||||||
### Updating distribution configurations
|
### Updating distribution configurations
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
include pyproject.toml
|
include pyproject.toml
|
||||||
include llama_stack/models/llama/llama3/tokenizer.model
|
include llama_stack/models/llama/llama3/tokenizer.model
|
||||||
include llama_stack/models/llama/llama4/tokenizer.model
|
include llama_stack/models/llama/llama4/tokenizer.model
|
||||||
include llama_stack/distribution/*.sh
|
include llama_stack/core/*.sh
|
||||||
include llama_stack/cli/scripts/*.sh
|
include llama_stack/cli/scripts/*.sh
|
||||||
include llama_stack/templates/*/*.yaml
|
include llama_stack/distributions/*/*.yaml
|
||||||
include llama_stack/providers/tests/test_cases/inference/*.json
|
include llama_stack/providers/tests/test_cases/inference/*.json
|
||||||
include llama_stack/models/llama/*/*.md
|
include llama_stack/models/llama/*/*.md
|
||||||
include llama_stack/tests/integration/*.jpg
|
include llama_stack/tests/integration/*.jpg
|
||||||
|
|
51
README.md
51
README.md
|
@ -6,7 +6,6 @@
|
||||||
[](https://discord.gg/llama-stack)
|
[](https://discord.gg/llama-stack)
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||||

|
|
||||||
|
|
||||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
@ -112,29 +111,33 @@ Here is a list of the various API providers and available distributions that can
|
||||||
Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)
|
Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)
|
||||||
|
|
||||||
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
||||||
|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
||||||
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| SambaNova | Hosted | | ✅ | | ✅ | | | | |
|
| SambaNova | Hosted | | ✅ | | ✅ | | | | |
|
||||||
| Cerebras | Hosted | | ✅ | | | | | | |
|
| Cerebras | Hosted | | ✅ | | | | | | |
|
||||||
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
|
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
|
||||||
| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
|
| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
|
||||||
| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
|
| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
|
||||||
| Groq | Hosted | | ✅ | | | | | | |
|
| Groq | Hosted | | ✅ | | | | | | |
|
||||||
| Ollama | Single Node | | ✅ | | | | | | |
|
| Ollama | Single Node | | ✅ | | | | | | |
|
||||||
| TGI | Hosted/Single Node | | ✅ | | | | | | |
|
| TGI | Hosted/Single Node | | ✅ | | | | | | |
|
||||||
| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
|
| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
|
||||||
| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
|
| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
| PG Vector | Single Node | | | ✅ | | | | | |
|
| Milvus | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
|
| Qdrant | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
| vLLM | Single Node | | ✅ | | | | | | |
|
| Weaviate | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
| OpenAI | Hosted | | ✅ | | | | | | |
|
| SQLite-vec | Single Node | | | ✅ | | | | | |
|
||||||
| Anthropic | Hosted | | ✅ | | | | | | |
|
| PG Vector | Single Node | | | ✅ | | | | | |
|
||||||
| Gemini | Hosted | | ✅ | | | | | | |
|
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
|
||||||
| WatsonX | Hosted | | ✅ | | | | | | |
|
| vLLM | Single Node | | ✅ | | | | | | |
|
||||||
| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
|
| OpenAI | Hosted | | ✅ | | | | | | |
|
||||||
| TorchTune | Single Node | | | | | | ✅ | | |
|
| Anthropic | Hosted | | ✅ | | | | | | |
|
||||||
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
| Gemini | Hosted | | ✅ | | | | | | |
|
||||||
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
| WatsonX | Hosted | | ✅ | | | | | | |
|
||||||
|
| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
|
||||||
|
| TorchTune | Single Node | | | | | | ✅ | | |
|
||||||
|
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
||||||
|
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
||||||
|
|
||||||
> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation.
|
> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation.
|
||||||
|
|
||||||
|
|
2210
docs/_static/llama-stack-spec.html
vendored
2210
docs/_static/llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
1351
docs/_static/llama-stack-spec.yaml
vendored
1351
docs/_static/llama-stack-spec.yaml
vendored
File diff suppressed because it is too large
Load diff
|
@ -123,7 +123,7 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --template together --image-type venv \n",
|
"!uv run --with llama-stack llama stack build --distro together --image-type venv \n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
@ -165,7 +165,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -233,7 +233,7 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server \n",
|
"# this command installs all the dependencies needed for the llama stack server \n",
|
||||||
"!uv run --with llama-stack llama stack build --template meta-reference-gpu --image-type venv \n",
|
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv \n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
@ -275,7 +275,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -223,7 +223,7 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server \n",
|
"# this command installs all the dependencies needed for the llama stack server \n",
|
||||||
"!uv run --with llama-stack llama stack build --template llama_api --image-type venv \n",
|
"!uv run --with llama-stack llama stack build --distro llama_api --image-type venv \n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
@ -265,7 +265,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -37,7 +37,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"To learn more about torchtune: https://github.com/pytorch/torchtune\n",
|
"To learn more about torchtune: https://github.com/pytorch/torchtune\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We will use [experimental-post-training](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/experimental-post-training) as the distribution template\n",
|
"We will use [experimental-post-training](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/experimental-post-training) as the distribution template\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#### 0.0. Prerequisite: Have an OpenAI API key\n",
|
"#### 0.0. Prerequisite: Have an OpenAI API key\n",
|
||||||
"In this showcase, we will use [braintrust](https://www.braintrust.dev/) as scoring provider for eval and it uses OpenAI model as judge model for scoring. So, you need to get an API key from [OpenAI developer platform](https://platform.openai.com/docs/overview).\n",
|
"In this showcase, we will use [braintrust](https://www.braintrust.dev/) as scoring provider for eval and it uses OpenAI model as judge model for scoring. So, you need to get an API key from [OpenAI developer platform](https://platform.openai.com/docs/overview).\n",
|
||||||
|
@ -2864,7 +2864,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"!llama stack build --template experimental-post-training --image-type venv --image-name __system__"
|
"!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -3216,19 +3216,19 @@
|
||||||
"INFO:datasets:Duckdb version 1.1.3 available.\n",
|
"INFO:datasets:Duckdb version 1.1.3 available.\n",
|
||||||
"INFO:datasets:TensorFlow version 2.18.0 available.\n",
|
"INFO:datasets:TensorFlow version 2.18.0 available.\n",
|
||||||
"INFO:datasets:JAX version 0.4.33 available.\n",
|
"INFO:datasets:JAX version 0.4.33 available.\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
|
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
|
||||||
"INFO:llama_stack.distribution.stack:\n"
|
"INFO:llama_stack.core.stack:\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -3448,7 +3448,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
|
"os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
|
"client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
|
||||||
"_ = client.initialize()"
|
"_ = client.initialize()"
|
||||||
]
|
]
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!pip install -U llama-stack\n",
|
"!pip install -U llama-stack\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --template fireworks --image-type venv"
|
"!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -48,7 +48,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack_client import LlamaStackClient, Agent\n",
|
"from llama_stack_client import LlamaStackClient, Agent\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"from rich.pretty import pprint\n",
|
"from rich.pretty import pprint\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import uuid\n",
|
"import uuid\n",
|
||||||
|
|
|
@ -57,7 +57,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv"
|
"!UV_SYSTEM_PYTHON=1 llama stack build --distro together --image-type venv"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -661,7 +661,7 @@
|
||||||
"except ImportError:\n",
|
"except ImportError:\n",
|
||||||
" print(\"Not in Google Colab environment\")\n",
|
" print(\"Not in Google Colab environment\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"together\")\n",
|
"client = LlamaStackAsLibraryClient(\"together\")\n",
|
||||||
"_ = client.initialize()"
|
"_ = client.initialize()"
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack_client import LlamaStackClient, Agent\n",
|
"from llama_stack_client import LlamaStackClient, Agent\n",
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"from rich.pretty import pprint\n",
|
"from rich.pretty import pprint\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import uuid\n",
|
"import uuid\n",
|
||||||
|
|
|
@ -92,7 +92,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\n",
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --template nvidia --image-type venv\n",
|
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -194,7 +194,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -81,7 +81,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\n",
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --template nvidia --image-type venv\n",
|
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||||
"client.initialize()"
|
"client.initialize()"
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
|
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.
|
||||||
|
|
|
@ -17,7 +17,7 @@ import fire
|
||||||
import ruamel.yaml as yaml
|
import ruamel.yaml as yaml
|
||||||
|
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
|
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
|
||||||
from llama_stack.distribution.stack import LlamaStack # noqa: E402
|
from llama_stack.core.stack import LlamaStack # noqa: E402
|
||||||
|
|
||||||
from .pyopenapi.options import Options # noqa: E402
|
from .pyopenapi.options import Options # noqa: E402
|
||||||
from .pyopenapi.specification import Info, Server # noqa: E402
|
from .pyopenapi.specification import Info, Server # noqa: E402
|
||||||
|
|
|
@ -12,7 +12,7 @@ from typing import TextIO
|
||||||
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
|
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
|
||||||
|
|
||||||
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
||||||
from llama_stack.distribution.resolver import api_protocol_map
|
from llama_stack.core.resolver import api_protocol_map
|
||||||
|
|
||||||
from .generator import Generator
|
from .generator import Generator
|
||||||
from .options import Options
|
from .options import Options
|
||||||
|
|
|
@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat
|
||||||
|
|
||||||
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
|
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
|
||||||
|
|
||||||
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository.
|
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository.
|
||||||
|
|
||||||
## Limitations
|
## Limitations
|
||||||
|
|
||||||
|
|
|
@ -145,12 +145,12 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --template starter --image-type venv\n",
|
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" f\"uv run --with llama-stack llama stack run starter --image-type venv --env INFERENCE_MODEL=llama3.2:3b\",\n",
|
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
@ -187,7 +187,7 @@
|
||||||
"# use this helper if needed to kill the server \n",
|
"# use this helper if needed to kill the server \n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes\n",
|
" # Kill any existing llama stack server processes\n",
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# inline::meta-reference
|
# inline::meta-reference
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# remote::nvidia
|
# remote::nvidia
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -43,7 +43,7 @@ We have built-in functionality to run the supported open-benckmarks using llama-
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' template
|
Spin up llama stack server with 'open-benchmark' template
|
||||||
```
|
```
|
||||||
llama stack run llama_stack/templates/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ To use the HF SFTTrainer in your Llama Stack project, follow these steps:
|
||||||
You can access the HuggingFace trainer via the `ollama` distribution:
|
You can access the HuggingFace trainer via the `ollama` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --template starter --image-type venv
|
llama stack build --distro starter --image-type venv
|
||||||
llama stack run --image-type venv ~/.llama/distributions/ollama/ollama-run.yaml
|
llama stack run --image-type venv ~/.llama/distributions/ollama/ollama-run.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# inline::huggingface
|
# inline::huggingface
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# inline::torchtune
|
# inline::torchtune
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# remote::nvidia
|
# remote::nvidia
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# inline::basic
|
# inline::basic
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# inline::braintrust
|
# inline::braintrust
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# inline::llm-as-judge
|
# inline::llm-as-judge
|
||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
|
@ -355,7 +355,7 @@ server:
|
||||||
8. Run the server:
|
8. Run the server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml
|
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
9. Test the API:
|
9. Test the API:
|
||||||
|
|
|
@ -97,11 +97,11 @@ To start the Llama Stack Playground, run the following commands:
|
||||||
1. Start up the Llama Stack API server
|
1. Start up the Llama Stack API server
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --template together --image-type conda
|
llama stack build --distro together --image-type venv
|
||||||
llama stack run together
|
llama stack run together
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Start Streamlit UI
|
2. Start Streamlit UI
|
||||||
```bash
|
```bash
|
||||||
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
|
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
|
||||||
```
|
```
|
||||||
|
|
|
@ -11,4 +11,5 @@ See the [Adding a New API Provider](new_api_provider.md) which describes how to
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
new_api_provider
|
new_api_provider
|
||||||
|
testing
|
||||||
```
|
```
|
||||||
|
|
|
@ -6,7 +6,7 @@ This guide will walk you through the process of adding a new API provider to Lla
|
||||||
- Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
- Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
||||||
- Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
|
- Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
|
||||||
- Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
|
- Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
|
||||||
- Update any distribution {repopath}`Templates::llama_stack/templates/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
|
- Update any distribution {repopath}`Templates::llama_stack/distributions/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
|
||||||
|
|
||||||
|
|
||||||
Here are some example PRs to help you get started:
|
Here are some example PRs to help you get started:
|
||||||
|
@ -52,7 +52,7 @@ def get_base_url(self) -> str:
|
||||||
|
|
||||||
## Testing the Provider
|
## Testing the Provider
|
||||||
|
|
||||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --template together`.
|
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
|
||||||
|
|
||||||
### 1. Integration Testing
|
### 1. Integration Testing
|
||||||
|
|
||||||
|
|
|
@ -174,7 +174,7 @@ spec:
|
||||||
- name: llama-stack
|
- name: llama-stack
|
||||||
image: localhost/llama-stack-run-k8s:latest
|
image: localhost/llama-stack-run-k8s:latest
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
|
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5000
|
- containerPort: 5000
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
@ -47,26 +47,26 @@ pip install -e .
|
||||||
```
|
```
|
||||||
Use the CLI to build your distribution.
|
Use the CLI to build your distribution.
|
||||||
The main points to consider are:
|
The main points to consider are:
|
||||||
1. **Image Type** - Do you want a Conda / venv environment or a Container (eg. Docker)
|
1. **Image Type** - Do you want a venv environment or a Container (eg. Docker)
|
||||||
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
|
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
|
||||||
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
|
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack build -h
|
llama stack build -h
|
||||||
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--list-templates] [--image-type {conda,container,venv}] [--image-name IMAGE_NAME] [--print-deps-only] [--run]
|
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--list-templates] [--image-type {container,venv}] [--image-name IMAGE_NAME] [--print-deps-only] [--run]
|
||||||
|
|
||||||
Build a Llama stack container
|
Build a Llama stack container
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
|
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will
|
||||||
be prompted to enter information interactively (default: None)
|
be prompted to enter information interactively (default: None)
|
||||||
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
||||||
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
||||||
--image-type {conda,container,venv}
|
--image-type {container,venv}
|
||||||
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
|
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
|
||||||
--image-name IMAGE_NAME
|
--image-name IMAGE_NAME
|
||||||
[for image-type=conda|container|venv] Name of the conda or virtual environment to use for the build. If not specified, currently active environment will be used if
|
[for image-type=container|venv] Name of the virtual environment to use for the build. If not specified, currently active environment will be used if
|
||||||
found. (default: None)
|
found. (default: None)
|
||||||
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
||||||
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
||||||
|
@ -141,7 +141,7 @@ You may then pick a template to build your distribution with providers fitted to
|
||||||
|
|
||||||
For example, to build a distribution with TGI as the inference provider, you can run:
|
For example, to build a distribution with TGI as the inference provider, you can run:
|
||||||
```
|
```
|
||||||
$ llama stack build --template starter
|
$ llama stack build --distro starter
|
||||||
...
|
...
|
||||||
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
||||||
```
|
```
|
||||||
|
@ -159,7 +159,7 @@ It would be best to start with a template and understand the structure of the co
|
||||||
llama stack build
|
llama stack build
|
||||||
|
|
||||||
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
|
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
|
||||||
> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda
|
> Enter the image type you want your Llama Stack to be built as (container or venv): venv
|
||||||
|
|
||||||
Llama Stack is composed of several APIs working together. Let's select
|
Llama Stack is composed of several APIs working together. Let's select
|
||||||
the provider types (implementations) you want to use for these APIs.
|
the provider types (implementations) you want to use for these APIs.
|
||||||
|
@ -184,10 +184,10 @@ You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack
|
||||||
:::{tab-item} Building from a pre-existing build config file
|
:::{tab-item} Building from a pre-existing build config file
|
||||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
||||||
|
|
||||||
- The config file will be of contents like the ones in `llama_stack/templates/*build.yaml`.
|
- The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack build --config llama_stack/templates/starter/build.yaml
|
llama stack build --config llama_stack/distributions/starter/build.yaml
|
||||||
```
|
```
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
@ -253,11 +253,11 @@ Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podm
|
||||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
||||||
|
|
||||||
```
|
```
|
||||||
llama stack build --template starter --image-type container
|
llama stack build --distro starter --image-type container
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
$ llama stack build --template starter --image-type container
|
$ llama stack build --distro starter --image-type container
|
||||||
...
|
...
|
||||||
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
||||||
...
|
...
|
||||||
|
@ -312,7 +312,7 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
|
||||||
```
|
```
|
||||||
llama stack run -h
|
llama stack run -h
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
|
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
|
||||||
[--image-type {conda,venv}] [--enable-ui]
|
[--image-type {venv}] [--enable-ui]
|
||||||
[config | template]
|
[config | template]
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||||
|
@ -326,8 +326,8 @@ options:
|
||||||
--image-name IMAGE_NAME
|
--image-name IMAGE_NAME
|
||||||
Name of the image to run. Defaults to the current environment (default: None)
|
Name of the image to run. Defaults to the current environment (default: None)
|
||||||
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
|
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
|
||||||
--image-type {conda,venv}
|
--image-type {venv}
|
||||||
Image Type used during the build. This can be either conda or venv. (default: None)
|
Image Type used during the build. This should be venv. (default: None)
|
||||||
--enable-ui Start the UI server (default: False)
|
--enable-ui Start the UI server (default: False)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -342,9 +342,6 @@ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-
|
||||||
|
|
||||||
# Start using a venv
|
# Start using a venv
|
||||||
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||||
|
|
||||||
# Start using a conda environment
|
|
||||||
llama stack run --image-type conda ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -10,7 +10,6 @@ The default `run.yaml` files generated by templates are starting points for your
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
version: 2
|
version: 2
|
||||||
conda_env: ollama
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
|
|
|
@ -6,14 +6,14 @@ This avoids the overhead of setting up a server.
|
||||||
```bash
|
```bash
|
||||||
# setup
|
# setup
|
||||||
uv pip install llama-stack
|
uv pip install llama-stack
|
||||||
llama stack build --template starter --image-type venv
|
llama stack build --distro starter --image-type venv
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||||
|
|
||||||
client = LlamaStackAsLibraryClient(
|
client = LlamaStackAsLibraryClient(
|
||||||
"ollama",
|
"starter",
|
||||||
# provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
|
# provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
|
||||||
provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
|
provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,6 +9,7 @@ This section provides an overview of the distributions available in Llama Stack.
|
||||||
list_of_distributions
|
list_of_distributions
|
||||||
building_distro
|
building_distro
|
||||||
customizing_run_yaml
|
customizing_run_yaml
|
||||||
|
starting_llama_stack_server
|
||||||
importing_as_library
|
importing_as_library
|
||||||
configuration
|
configuration
|
||||||
```
|
```
|
||||||
|
|
|
@ -34,6 +34,13 @@ data:
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
|
kvstore:
|
||||||
|
type: postgres
|
||||||
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
value: "${SAFETY_MODEL}"
|
value: "${SAFETY_MODEL}"
|
||||||
- name: TAVILY_SEARCH_API_KEY
|
- name: TAVILY_SEARCH_API_KEY
|
||||||
value: "${TAVILY_SEARCH_API_KEY}"
|
value: "${TAVILY_SEARCH_API_KEY}"
|
||||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8321
|
- containerPort: 8321
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|
|
@ -31,6 +31,13 @@ providers:
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
config:
|
config:
|
||||||
url: ${env.CHROMADB_URL:=}
|
url: ${env.CHROMADB_URL:=}
|
||||||
|
kvstore:
|
||||||
|
type: postgres
|
||||||
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
|
@ -56,10 +56,10 @@ Breaking down the demo app, this section will show the core pieces that are used
|
||||||
### Setup Remote Inferencing
|
### Setup Remote Inferencing
|
||||||
Start a Llama Stack server on localhost. Here is an example of how you can do this using the firework.ai distribution:
|
Start a Llama Stack server on localhost. Here is an example of how you can do this using the firework.ai distribution:
|
||||||
```
|
```
|
||||||
conda create -n stack-fireworks python=3.10
|
python -m venv stack-fireworks
|
||||||
conda activate stack-fireworks
|
source stack-fireworks/bin/activate # On Windows: stack-fireworks\Scripts\activate
|
||||||
pip install --no-cache llama-stack==0.2.2
|
pip install --no-cache llama-stack==0.2.2
|
||||||
llama stack build --template fireworks --image-type conda
|
llama stack build --distro fireworks --image-type venv
|
||||||
export FIREWORKS_API_KEY=<SOME_KEY>
|
export FIREWORKS_API_KEY=<SOME_KEY>
|
||||||
llama stack run fireworks --port 5050
|
llama stack run fireworks --port 5050
|
||||||
```
|
```
|
||||||
|
|
|
@ -57,7 +57,7 @@ Make sure you have access to a watsonx API Key. You can get one by referring [wa
|
||||||
|
|
||||||
## Running Llama Stack with watsonx
|
## Running Llama Stack with watsonx
|
||||||
|
|
||||||
You can do this via Conda (build code), venv or Docker which has a pre-built image.
|
You can do this via venv or Docker which has a pre-built image.
|
||||||
|
|
||||||
### Via Docker
|
### Via Docker
|
||||||
|
|
||||||
|
@ -76,13 +76,3 @@ docker run \
|
||||||
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
|
||||||
--env WATSONX_BASE_URL=$WATSONX_BASE_URL
|
--env WATSONX_BASE_URL=$WATSONX_BASE_URL
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama stack build --template watsonx --image-type conda
|
|
||||||
llama stack run ./run.yaml \
|
|
||||||
--port $LLAMA_STACK_PORT \
|
|
||||||
--env WATSONX_API_KEY=$WATSONX_API_KEY \
|
|
||||||
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
|
|
||||||
```
|
|
||||||
|
|
|
@ -114,7 +114,7 @@ podman run --rm -it \
|
||||||
|
|
||||||
## Running Llama Stack
|
## Running Llama Stack
|
||||||
|
|
||||||
Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
|
Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via venv or Docker which has a pre-built image.
|
||||||
|
|
||||||
### Via Docker
|
### Via Docker
|
||||||
|
|
||||||
|
@ -153,7 +153,7 @@ docker run \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v $HOME/.llama:/root/.llama \
|
-v $HOME/.llama:/root/.llama \
|
||||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
llamastack/distribution-dell \
|
llamastack/distribution-dell \
|
||||||
--config /root/my-run.yaml \
|
--config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
@ -164,12 +164,12 @@ docker run \
|
||||||
--env CHROMA_URL=$CHROMA_URL
|
--env CHROMA_URL=$CHROMA_URL
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --template dell --image-type conda
|
llama stack build --distro dell --image-type venv
|
||||||
llama stack run dell
|
llama stack run dell
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
|
|
@ -70,7 +70,7 @@ $ llama model list --downloaded
|
||||||
|
|
||||||
## Running the Distribution
|
## Running the Distribution
|
||||||
|
|
||||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
You can do this via venv or Docker which has a pre-built image.
|
||||||
|
|
||||||
### Via Docker
|
### Via Docker
|
||||||
|
|
||||||
|
@ -104,12 +104,12 @@ docker run \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --template meta-reference-gpu --image-type conda
|
llama stack build --distro meta-reference-gpu --image-type venv
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||||
--port 8321 \
|
--port 8321 \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||||
|
|
|
@ -133,7 +133,7 @@ curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-inst
|
||||||
|
|
||||||
## Running Llama Stack with NVIDIA
|
## Running Llama Stack with NVIDIA
|
||||||
|
|
||||||
You can do this via Conda or venv (build code), or Docker which has a pre-built image.
|
You can do this via venv (build code), or Docker which has a pre-built image.
|
||||||
|
|
||||||
### Via Docker
|
### Via Docker
|
||||||
|
|
||||||
|
@ -152,24 +152,13 @@ docker run \
|
||||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
|
||||||
|
|
||||||
```bash
|
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
|
|
||||||
llama stack build --template nvidia --image-type conda
|
|
||||||
llama stack run ./run.yaml \
|
|
||||||
--port 8321 \
|
|
||||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
|
||||||
```
|
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
|
INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
|
||||||
llama stack build --template nvidia --image-type venv
|
llama stack build --distro nvidia --image-type venv
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
--port 8321 \
|
--port 8321 \
|
||||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
|
|
|
@ -100,10 +100,6 @@ The following environment variables can be configured:
|
||||||
### Model Configuration
|
### Model Configuration
|
||||||
- `INFERENCE_MODEL`: HuggingFace model for serverless inference
|
- `INFERENCE_MODEL`: HuggingFace model for serverless inference
|
||||||
- `INFERENCE_ENDPOINT_NAME`: HuggingFace endpoint name
|
- `INFERENCE_ENDPOINT_NAME`: HuggingFace endpoint name
|
||||||
- `OLLAMA_INFERENCE_MODEL`: Ollama model name
|
|
||||||
- `OLLAMA_EMBEDDING_MODEL`: Ollama embedding model name
|
|
||||||
- `OLLAMA_EMBEDDING_DIMENSION`: Ollama embedding dimension (default: `384`)
|
|
||||||
- `VLLM_INFERENCE_MODEL`: vLLM model name
|
|
||||||
|
|
||||||
### Vector Database Configuration
|
### Vector Database Configuration
|
||||||
- `SQLITE_STORE_DIR`: SQLite store directory (default: `~/.llama/distributions/starter`)
|
- `SQLITE_STORE_DIR`: SQLite store directory (default: `~/.llama/distributions/starter`)
|
||||||
|
@ -127,47 +123,29 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
## Enabling Providers
|
## Enabling Providers
|
||||||
|
|
||||||
You can enable specific providers by setting their provider ID to a valid value using environment variables. This is useful when you want to use certain providers or don't have the required API keys.
|
You can enable specific providers by setting appropriate environment variables. For example,
|
||||||
|
|
||||||
### Examples of Enabling Providers
|
|
||||||
|
|
||||||
#### Enable FAISS Vector Provider
|
|
||||||
```bash
|
```bash
|
||||||
export ENABLE_FAISS=faiss
|
# self-hosted
|
||||||
|
export OLLAMA_URL=http://localhost:11434 # enables the Ollama inference provider
|
||||||
|
export VLLM_URL=http://localhost:8000/v1 # enables the vLLM inference provider
|
||||||
|
export TGI_URL=http://localhost:8000/v1 # enables the TGI inference provider
|
||||||
|
|
||||||
|
# cloud-hosted requiring API key configuration on the server
|
||||||
|
export CEREBRAS_API_KEY=your_cerebras_api_key # enables the Cerebras inference provider
|
||||||
|
export NVIDIA_API_KEY=your_nvidia_api_key # enables the NVIDIA inference provider
|
||||||
|
|
||||||
|
# vector providers
|
||||||
|
export MILVUS_URL=http://localhost:19530 # enables the Milvus vector provider
|
||||||
|
export CHROMADB_URL=http://localhost:8000/v1 # enables the ChromaDB vector provider
|
||||||
|
export PGVECTOR_DB=llama_stack_db # enables the PGVector vector provider
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Enable Ollama Models
|
This distribution comes with a default "llama-guard" shield that can be enabled by setting the `SAFETY_MODEL` environment variable to point to an appropriate Llama Guard model id. Use `llama-stack-client models list` to see the list of available models.
|
||||||
```bash
|
|
||||||
export ENABLE_OLLAMA=ollama
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Disable vLLM Models
|
|
||||||
```bash
|
|
||||||
export VLLM_INFERENCE_MODEL=__disabled__
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Disable Optional Vector Providers
|
|
||||||
```bash
|
|
||||||
export ENABLE_SQLITE_VEC=__disabled__
|
|
||||||
export ENABLE_CHROMADB=__disabled__
|
|
||||||
export ENABLE_PGVECTOR=__disabled__
|
|
||||||
```
|
|
||||||
|
|
||||||
### Provider ID Patterns
|
|
||||||
|
|
||||||
The starter distribution uses several patterns for provider IDs:
|
|
||||||
|
|
||||||
1. **Direct provider IDs**: `faiss`, `ollama`, `vllm`
|
|
||||||
2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC:+sqlite-vec}`
|
|
||||||
3. **Model-based provider IDs**: `${env.OLLAMA_INFERENCE_MODEL:__disabled__}`
|
|
||||||
|
|
||||||
When using the `+` pattern (like `${env.ENABLE_SQLITE_VEC+sqlite-vec}`), the provider is enabled by default and can be disabled by setting the environment variable to `__disabled__`.
|
|
||||||
|
|
||||||
When using the `:` pattern (like `${env.OLLAMA_INFERENCE_MODEL:__disabled__}`), the provider is disabled by default and can be enabled by setting the environment variable to a valid value.
|
|
||||||
|
|
||||||
## Running the Distribution
|
## Running the Distribution
|
||||||
|
|
||||||
You can run the starter distribution via Docker, Conda, or venv.
|
You can run the starter distribution via Docker or venv.
|
||||||
|
|
||||||
### Via Docker
|
### Via Docker
|
||||||
|
|
||||||
|
@ -186,12 +164,12 @@ docker run \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda or venv
|
### Via venv
|
||||||
|
|
||||||
Ensure you have configured the starter distribution using the environment variables explained above.
|
Ensure you have configured the starter distribution using the environment variables explained above.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with llama-stack llama stack build --template starter --image-type <conda|venv> --run
|
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example Usage
|
## Example Usage
|
||||||
|
|
|
@ -11,12 +11,6 @@ This is the simplest way to get started. Using Llama Stack as a library means yo
|
||||||
|
|
||||||
Another simple way to start interacting with Llama Stack is to just spin up a container (via Docker or Podman) which is pre-built with all the providers you need. We provide a number of pre-built images so you can start a Llama Stack server instantly. You can also build your own custom container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](selection) for more details.
|
Another simple way to start interacting with Llama Stack is to just spin up a container (via Docker or Podman) which is pre-built with all the providers you need. We provide a number of pre-built images so you can start a Llama Stack server instantly. You can also build your own custom container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](selection) for more details.
|
||||||
|
|
||||||
|
|
||||||
## Conda:
|
|
||||||
|
|
||||||
If you have a custom or an advanced setup or you are developing on Llama Stack you can also build a custom Llama Stack server. Using `llama stack build` and `llama stack run` you can build/run a custom Llama Stack server containing the exact combination of providers you wish. We have also provided various templates to make getting started easier. See [Building a Custom Distribution](building_distro) for more details.
|
|
||||||
|
|
||||||
|
|
||||||
## Kubernetes:
|
## Kubernetes:
|
||||||
|
|
||||||
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](kubernetes_deployment) for more details.
|
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](kubernetes_deployment) for more details.
|
||||||
|
|
|
@ -59,10 +59,10 @@ Now let's build and run the Llama Stack config for Ollama.
|
||||||
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --template starter --image-type venv --run
|
llama stack build --distro starter --image-type venv --run
|
||||||
```
|
```
|
||||||
:::
|
:::
|
||||||
:::{tab-item} Using `conda`
|
:::{tab-item} Using `venv`
|
||||||
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
||||||
|
|
||||||
Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup,
|
Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup,
|
||||||
|
@ -70,7 +70,7 @@ which defines the providers and their settings.
|
||||||
Now let's build and run the Llama Stack config for Ollama.
|
Now let's build and run the Llama Stack config for Ollama.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --template starter --image-type conda --run
|
llama stack build --distro starter --image-type venv --run
|
||||||
```
|
```
|
||||||
:::
|
:::
|
||||||
:::{tab-item} Using a Container
|
:::{tab-item} Using a Container
|
||||||
|
@ -150,10 +150,10 @@ pip install llama-stack-client
|
||||||
```
|
```
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{tab-item} Install with `conda`
|
:::{tab-item} Install with `venv`
|
||||||
```bash
|
```bash
|
||||||
yes | conda create -n stack-client python=3.12
|
python -m venv stack-client
|
||||||
conda activate stack-client
|
source stack-client/bin/activate # On Windows: stack-client\Scripts\activate
|
||||||
pip install llama-stack-client
|
pip install llama-stack-client
|
||||||
```
|
```
|
||||||
:::
|
:::
|
||||||
|
|
|
@ -16,10 +16,13 @@ as the inference [provider](../providers/inference/index) for a Llama Model.
|
||||||
```bash
|
```bash
|
||||||
ollama run llama3.2:3b --keepalive 60m
|
ollama run llama3.2:3b --keepalive 60m
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 2: Run the Llama Stack server
|
#### Step 2: Run the Llama Stack server
|
||||||
|
|
||||||
We will use `uv` to run the Llama Stack server.
|
We will use `uv` to run the Llama Stack server.
|
||||||
```bash
|
```bash
|
||||||
uv run --with llama-stack llama stack build --template starter --image-type venv --run
|
OLLAMA_URL=http://localhost:11434 \
|
||||||
|
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
||||||
```
|
```
|
||||||
#### Step 3: Run the demo
|
#### Step 3: Run the demo
|
||||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||||
|
|
|
@ -1,5 +1,13 @@
|
||||||
# Agents Providers
|
# Agents
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **agents** API.
|
This section contains documentation for all available providers for the **agents** API.
|
||||||
|
|
||||||
- [inline::meta-reference](inline_meta-reference.md)
|
## Providers
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inline_meta-reference
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,15 @@
|
||||||
# Datasetio Providers
|
# Datasetio
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **datasetio** API.
|
This section contains documentation for all available providers for the **datasetio** API.
|
||||||
|
|
||||||
- [inline::localfs](inline_localfs.md)
|
## Providers
|
||||||
- [remote::huggingface](remote_huggingface.md)
|
|
||||||
- [remote::nvidia](remote_nvidia.md)
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inline_localfs
|
||||||
|
remote_huggingface
|
||||||
|
remote_nvidia
|
||||||
|
```
|
||||||
|
|
|
@ -1,6 +1,14 @@
|
||||||
# Eval Providers
|
# Eval
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **eval** API.
|
This section contains documentation for all available providers for the **eval** API.
|
||||||
|
|
||||||
- [inline::meta-reference](inline_meta-reference.md)
|
## Providers
|
||||||
- [remote::nvidia](remote_nvidia.md)
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inline_meta-reference
|
||||||
|
remote_nvidia
|
||||||
|
```
|
||||||
|
|
|
@ -1,9 +1,4 @@
|
||||||
# External Providers Guide
|
# Creating External Providers
|
||||||
|
|
||||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
|
|
||||||
- Create and maintain your own providers independently
|
|
||||||
- Share providers with others without contributing to the main codebase
|
|
||||||
- Keep provider-specific code separate from the core Llama Stack code
|
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
|
@ -12,8 +7,7 @@ To enable external providers, you need to add `module` into your build yaml, all
|
||||||
an example entry in your build.yaml should look like:
|
an example entry in your build.yaml should look like:
|
||||||
|
|
||||||
```
|
```
|
||||||
- provider_id: ramalama
|
- provider_type: remote::ramalama
|
||||||
provider_type: remote::ramalama
|
|
||||||
module: ramalama_stack
|
module: ramalama_stack
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -56,17 +50,6 @@ Llama Stack supports two types of external providers:
|
||||||
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
||||||
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
||||||
|
|
||||||
## Known External Providers
|
|
||||||
|
|
||||||
Here's a list of known external providers that you can use with Llama Stack:
|
|
||||||
|
|
||||||
| Name | Description | API | Type | Repository |
|
|
||||||
|------|-------------|-----|------|------------|
|
|
||||||
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
|
|
||||||
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
|
||||||
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
|
||||||
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
|
||||||
|
|
||||||
### Remote Provider Specification
|
### Remote Provider Specification
|
||||||
|
|
||||||
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
||||||
|
@ -120,9 +103,9 @@ container_image: custom-vector-store:latest # optional
|
||||||
- `provider_data_validator`: Optional validator for provider data
|
- `provider_data_validator`: Optional validator for provider data
|
||||||
- `container_image`: Optional container image to use instead of pip packages
|
- `container_image`: Optional container image to use instead of pip packages
|
||||||
|
|
||||||
## Required Implementation
|
## Required Fields
|
||||||
|
|
||||||
## All Providers
|
### All Providers
|
||||||
|
|
||||||
All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
|
All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
|
||||||
|
|
||||||
|
@ -147,7 +130,7 @@ def get_provider_spec() -> ProviderSpec:
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Remote Providers
|
#### Remote Providers
|
||||||
|
|
||||||
Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
|
Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
|
||||||
1. `config`: An instance of the provider's config class
|
1. `config`: An instance of the provider's config class
|
||||||
|
@ -163,7 +146,7 @@ async def get_adapter_impl(
|
||||||
return OllamaInferenceAdapter(config)
|
return OllamaInferenceAdapter(config)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Inline Providers
|
#### Inline Providers
|
||||||
|
|
||||||
Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
|
Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
|
||||||
1. `config`: An instance of the provider's config class
|
1. `config`: An instance of the provider's config class
|
||||||
|
@ -190,7 +173,40 @@ Version: 0.1.0
|
||||||
Location: /path/to/venv/lib/python3.10/site-packages
|
Location: /path/to/venv/lib/python3.10/site-packages
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example using `external_providers_dir`: Custom Ollama Provider
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
|
||||||
|
|
||||||
|
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
|
||||||
|
|
||||||
|
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
|
||||||
|
|
||||||
|
4. **Documentation**: Include clear documentation in your provider package about:
|
||||||
|
- Installation requirements
|
||||||
|
- Configuration options
|
||||||
|
- Usage examples
|
||||||
|
- Any limitations or known issues
|
||||||
|
|
||||||
|
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
|
||||||
|
You can refer to the [integration tests
|
||||||
|
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
|
||||||
|
information. Execute the test for the Provider type you are developing.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
If your external provider isn't being loaded:
|
||||||
|
|
||||||
|
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
||||||
|
1. Check that the `external_providers_dir` path is correct and accessible.
|
||||||
|
2. Verify that the YAML files are properly formatted.
|
||||||
|
3. Ensure all required Python packages are installed.
|
||||||
|
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
||||||
|
information using `LLAMA_STACK_LOGGING=all=debug`.
|
||||||
|
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Example using `external_providers_dir`: Custom Ollama Provider
|
||||||
|
|
||||||
Here's a complete example of creating and using a custom Ollama provider:
|
Here's a complete example of creating and using a custom Ollama provider:
|
||||||
|
|
||||||
|
@ -242,7 +258,7 @@ external_providers_dir: ~/.llama/providers.d/
|
||||||
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
|
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
|
||||||
|
|
||||||
|
|
||||||
## Example using `module`: ramalama-stack
|
### Example using `module`: ramalama-stack
|
||||||
|
|
||||||
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
||||||
|
|
||||||
|
@ -255,8 +271,7 @@ distribution_spec:
|
||||||
container_image: null
|
container_image: null
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: ramalama
|
- provider_type: remote::ramalama
|
||||||
provider_type: remote::ramalama
|
|
||||||
module: ramalama_stack==0.3.0a0
|
module: ramalama_stack==0.3.0a0
|
||||||
image_type: venv
|
image_type: venv
|
||||||
image_name: null
|
image_name: null
|
||||||
|
@ -268,35 +283,4 @@ additional_pip_packages:
|
||||||
|
|
||||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
||||||
|
|
||||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
|
|
||||||
|
|
||||||
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
|
|
||||||
|
|
||||||
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
|
|
||||||
|
|
||||||
4. **Documentation**: Include clear documentation in your provider package about:
|
|
||||||
- Installation requirements
|
|
||||||
- Configuration options
|
|
||||||
- Usage examples
|
|
||||||
- Any limitations or known issues
|
|
||||||
|
|
||||||
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
|
|
||||||
You can refer to the [integration tests
|
|
||||||
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
|
|
||||||
information. Execute the test for the Provider type you are developing.
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
If your external provider isn't being loaded:
|
|
||||||
|
|
||||||
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
|
||||||
1. Check that the `external_providers_dir` path is correct and accessible.
|
|
||||||
2. Verify that the YAML files are properly formatted.
|
|
||||||
3. Ensure all required Python packages are installed.
|
|
||||||
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
|
||||||
information using `LLAMA_STACK_LOGGING=all=debug`.
|
|
||||||
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
|
10
docs/source/providers/external/external-providers-list.md
vendored
Normal file
10
docs/source/providers/external/external-providers-list.md
vendored
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
# Known External Providers
|
||||||
|
|
||||||
|
Here's a list of known external providers that you can use with Llama Stack:
|
||||||
|
|
||||||
|
| Name | Description | API | Type | Repository |
|
||||||
|
|------|-------------|-----|------|------------|
|
||||||
|
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
|
||||||
|
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
||||||
|
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
||||||
|
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
13
docs/source/providers/external/index.md
vendored
Normal file
13
docs/source/providers/external/index.md
vendored
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# External Providers
|
||||||
|
|
||||||
|
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
|
||||||
|
- Create and maintain your own providers independently
|
||||||
|
- Share providers with others without contributing to the main codebase
|
||||||
|
- Keep provider-specific code separate from the core Llama Stack code
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
external-providers-list
|
||||||
|
external-providers-guide
|
||||||
|
```
|
|
@ -1,5 +1,13 @@
|
||||||
# Files Providers
|
# Files
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **files** API.
|
This section contains documentation for all available providers for the **files** API.
|
||||||
|
|
||||||
- [inline::localfs](inline_localfs.md)
|
## Providers
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inline_localfs
|
||||||
|
```
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# API Providers Overview
|
# API Providers
|
||||||
|
|
||||||
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
|
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
|
||||||
- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
|
- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
|
||||||
|
@ -12,81 +12,17 @@ Providers come in two flavors:
|
||||||
|
|
||||||
Importantly, Llama Stack always strives to provide at least one fully inline provider for each API so you can iterate on a fully featured environment locally.
|
Importantly, Llama Stack always strives to provide at least one fully inline provider for each API so you can iterate on a fully featured environment locally.
|
||||||
|
|
||||||
## External Providers
|
|
||||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
external.md
|
|
||||||
```
|
|
||||||
|
|
||||||
```{include} openai.md
|
|
||||||
:start-after: ## OpenAI API Compatibility
|
|
||||||
```
|
|
||||||
|
|
||||||
## Inference
|
|
||||||
Runs inference with an LLM.
|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
|
external/index
|
||||||
|
openai
|
||||||
inference/index
|
inference/index
|
||||||
```
|
|
||||||
|
|
||||||
## Agents
|
|
||||||
Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
agents/index
|
agents/index
|
||||||
```
|
|
||||||
|
|
||||||
## DatasetIO
|
|
||||||
Interfaces with datasets and data loaders.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
datasetio/index
|
datasetio/index
|
||||||
```
|
|
||||||
|
|
||||||
## Safety
|
|
||||||
Applies safety policies to the output at a Systems (not only model) level.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
safety/index
|
safety/index
|
||||||
```
|
|
||||||
|
|
||||||
## Telemetry
|
|
||||||
Collects telemetry data from the system.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
telemetry/index
|
telemetry/index
|
||||||
```
|
|
||||||
|
|
||||||
## Vector IO
|
|
||||||
|
|
||||||
Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
|
|
||||||
Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
|
|
||||||
io and database are used to store and retrieve documents for retrieval.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
vector_io/index
|
vector_io/index
|
||||||
```
|
|
||||||
|
|
||||||
## Tool Runtime
|
|
||||||
Is associated with the ToolGroup resources.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
tool_runtime/index
|
tool_runtime/index
|
||||||
```
|
files/index
|
||||||
|
```
|
||||||
|
|
|
@ -1,26 +1,34 @@
|
||||||
# Inference Providers
|
# Inference
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **inference** API.
|
This section contains documentation for all available providers for the **inference** API.
|
||||||
|
|
||||||
- [inline::meta-reference](inline_meta-reference.md)
|
## Providers
|
||||||
- [inline::sentence-transformers](inline_sentence-transformers.md)
|
|
||||||
- [remote::anthropic](remote_anthropic.md)
|
```{toctree}
|
||||||
- [remote::bedrock](remote_bedrock.md)
|
:maxdepth: 1
|
||||||
- [remote::cerebras](remote_cerebras.md)
|
|
||||||
- [remote::databricks](remote_databricks.md)
|
inline_meta-reference
|
||||||
- [remote::fireworks](remote_fireworks.md)
|
inline_sentence-transformers
|
||||||
- [remote::gemini](remote_gemini.md)
|
remote_anthropic
|
||||||
- [remote::groq](remote_groq.md)
|
remote_bedrock
|
||||||
- [remote::hf::endpoint](remote_hf_endpoint.md)
|
remote_cerebras
|
||||||
- [remote::hf::serverless](remote_hf_serverless.md)
|
remote_databricks
|
||||||
- [remote::llama-openai-compat](remote_llama-openai-compat.md)
|
remote_fireworks
|
||||||
- [remote::nvidia](remote_nvidia.md)
|
remote_gemini
|
||||||
- [remote::ollama](remote_ollama.md)
|
remote_groq
|
||||||
- [remote::openai](remote_openai.md)
|
remote_hf_endpoint
|
||||||
- [remote::passthrough](remote_passthrough.md)
|
remote_hf_serverless
|
||||||
- [remote::runpod](remote_runpod.md)
|
remote_llama-openai-compat
|
||||||
- [remote::sambanova](remote_sambanova.md)
|
remote_nvidia
|
||||||
- [remote::tgi](remote_tgi.md)
|
remote_ollama
|
||||||
- [remote::together](remote_together.md)
|
remote_openai
|
||||||
- [remote::vllm](remote_vllm.md)
|
remote_passthrough
|
||||||
- [remote::watsonx](remote_watsonx.md)
|
remote_runpod
|
||||||
|
remote_sambanova
|
||||||
|
remote_tgi
|
||||||
|
remote_together
|
||||||
|
remote_vllm
|
||||||
|
remote_watsonx
|
||||||
|
```
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
# remote::cerebras-openai-compat
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
|
||||||
|-------|------|----------|---------|-------------|
|
|
||||||
| `api_key` | `str \| None` | No | | The Cerebras API key |
|
|
||||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.cerebras.ai/v1 | The URL for the Cerebras API server |
|
|
||||||
|
|
||||||
## Sample Configuration
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
openai_compat_api_base: https://api.cerebras.ai/v1
|
|
||||||
api_key: ${env.CEREBRAS_API_KEY}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
# remote::fireworks-openai-compat
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
|
||||||
|-------|------|----------|---------|-------------|
|
|
||||||
| `api_key` | `str \| None` | No | | The Fireworks API key |
|
|
||||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks API server |
|
|
||||||
|
|
||||||
## Sample Configuration
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
openai_compat_api_base: https://api.fireworks.ai/inference/v1
|
|
||||||
api_key: ${env.FIREWORKS_API_KEY}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
# remote::groq-openai-compat
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
|
||||||
|-------|------|----------|---------|-------------|
|
|
||||||
| `api_key` | `str \| None` | No | | The Groq API key |
|
|
||||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.groq.com/openai/v1 | The URL for the Groq API server |
|
|
||||||
|
|
||||||
## Sample Configuration
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
openai_compat_api_base: https://api.groq.com/openai/v1
|
|
||||||
api_key: ${env.GROQ_API_KEY}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
|
@ -9,11 +9,13 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
| `api_key` | `str \| None` | No | | API key for OpenAI models |
|
||||||
|
| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
api_key: ${env.OPENAI_API_KEY:=}
|
api_key: ${env.OPENAI_API_KEY:=}
|
||||||
|
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
# remote::together-openai-compat
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
Together AI OpenAI-compatible provider for using Together models with OpenAI API format.
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
|
||||||
|-------|------|----------|---------|-------------|
|
|
||||||
| `api_key` | `str \| None` | No | | The Together API key |
|
|
||||||
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together API server |
|
|
||||||
|
|
||||||
## Sample Configuration
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
openai_compat_api_base: https://api.together.xyz/v1
|
|
||||||
api_key: ${env.TOGETHER_API_KEY}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
|
@ -1,7 +1,15 @@
|
||||||
# Post_Training Providers
|
# Post_Training
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **post_training** API.
|
This section contains documentation for all available providers for the **post_training** API.
|
||||||
|
|
||||||
- [inline::huggingface](inline_huggingface.md)
|
## Providers
|
||||||
- [inline::torchtune](inline_torchtune.md)
|
|
||||||
- [remote::nvidia](remote_nvidia.md)
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inline_huggingface
|
||||||
|
inline_torchtune
|
||||||
|
remote_nvidia
|
||||||
|
```
|
||||||
|
|
|
@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
|
||||||
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
|
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
|
||||||
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
|
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
|
||||||
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
|
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
|
||||||
|
| `dpo_beta` | `<class 'float'>` | No | 0.1 | |
|
||||||
|
| `use_reference_model` | `<class 'bool'>` | No | True | |
|
||||||
|
| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | |
|
||||||
|
| `dpo_output_dir` | `<class 'str'>` | No | ./checkpoints/dpo | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,18 @@
|
||||||
# Safety Providers
|
# Safety
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **safety** API.
|
This section contains documentation for all available providers for the **safety** API.
|
||||||
|
|
||||||
- [inline::code-scanner](inline_code-scanner.md)
|
## Providers
|
||||||
- [inline::llama-guard](inline_llama-guard.md)
|
|
||||||
- [inline::prompt-guard](inline_prompt-guard.md)
|
```{toctree}
|
||||||
- [remote::bedrock](remote_bedrock.md)
|
:maxdepth: 1
|
||||||
- [remote::nvidia](remote_nvidia.md)
|
|
||||||
- [remote::sambanova](remote_sambanova.md)
|
inline_code-scanner
|
||||||
|
inline_llama-guard
|
||||||
|
inline_prompt-guard
|
||||||
|
remote_bedrock
|
||||||
|
remote_nvidia
|
||||||
|
remote_sambanova
|
||||||
|
```
|
||||||
|
|
|
@ -1,7 +1,15 @@
|
||||||
# Scoring Providers
|
# Scoring
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **scoring** API.
|
This section contains documentation for all available providers for the **scoring** API.
|
||||||
|
|
||||||
- [inline::basic](inline_basic.md)
|
## Providers
|
||||||
- [inline::braintrust](inline_braintrust.md)
|
|
||||||
- [inline::llm-as-judge](inline_llm-as-judge.md)
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inline_basic
|
||||||
|
inline_braintrust
|
||||||
|
inline_llm-as-judge
|
||||||
|
```
|
||||||
|
|
|
@ -1,5 +1,13 @@
|
||||||
# Telemetry Providers
|
# Telemetry
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **telemetry** API.
|
This section contains documentation for all available providers for the **telemetry** API.
|
||||||
|
|
||||||
- [inline::meta-reference](inline_meta-reference.md)
|
## Providers
|
||||||
|
|
||||||
|
```{toctree}
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
inline_meta-reference
|
||||||
|
```
|
||||||
|
|
|
@ -1,10 +1,18 @@
|
||||||
# Tool_Runtime Providers
|
# Tool_Runtime
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **tool_runtime** API.
|
This section contains documentation for all available providers for the **tool_runtime** API.
|
||||||
|
|
||||||
- [inline::rag-runtime](inline_rag-runtime.md)
|
## Providers
|
||||||
- [remote::bing-search](remote_bing-search.md)
|
|
||||||
- [remote::brave-search](remote_brave-search.md)
|
```{toctree}
|
||||||
- [remote::model-context-protocol](remote_model-context-protocol.md)
|
:maxdepth: 1
|
||||||
- [remote::tavily-search](remote_tavily-search.md)
|
|
||||||
- [remote::wolfram-alpha](remote_wolfram-alpha.md)
|
inline_rag-runtime
|
||||||
|
remote_bing-search
|
||||||
|
remote_brave-search
|
||||||
|
remote_model-context-protocol
|
||||||
|
remote_tavily-search
|
||||||
|
remote_wolfram-alpha
|
||||||
|
```
|
||||||
|
|
|
@ -1,16 +1,24 @@
|
||||||
# Vector_Io Providers
|
# Vector_Io
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **vector_io** API.
|
This section contains documentation for all available providers for the **vector_io** API.
|
||||||
|
|
||||||
- [inline::chromadb](inline_chromadb.md)
|
## Providers
|
||||||
- [inline::faiss](inline_faiss.md)
|
|
||||||
- [inline::meta-reference](inline_meta-reference.md)
|
```{toctree}
|
||||||
- [inline::milvus](inline_milvus.md)
|
:maxdepth: 1
|
||||||
- [inline::qdrant](inline_qdrant.md)
|
|
||||||
- [inline::sqlite-vec](inline_sqlite-vec.md)
|
inline_chromadb
|
||||||
- [inline::sqlite_vec](inline_sqlite_vec.md)
|
inline_faiss
|
||||||
- [remote::chromadb](remote_chromadb.md)
|
inline_meta-reference
|
||||||
- [remote::milvus](remote_milvus.md)
|
inline_milvus
|
||||||
- [remote::pgvector](remote_pgvector.md)
|
inline_qdrant
|
||||||
- [remote::qdrant](remote_qdrant.md)
|
inline_sqlite-vec
|
||||||
- [remote::weaviate](remote_weaviate.md)
|
inline_sqlite_vec
|
||||||
|
remote_chromadb
|
||||||
|
remote_milvus
|
||||||
|
remote_pgvector
|
||||||
|
remote_qdrant
|
||||||
|
remote_weaviate
|
||||||
|
```
|
||||||
|
|
|
@ -51,11 +51,15 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `path` | `<class 'str'>` | No | PydanticUndefined | |
|
| `path` | `<class 'str'>` | No | PydanticUndefined | |
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
|
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,15 @@ Please refer to the inline provider documentation.
|
||||||
| `prefix` | `str \| None` | No | | |
|
| `prefix` | `str \| None` | No | | |
|
||||||
| `timeout` | `int \| None` | No | | |
|
| `timeout` | `int \| None` | No | | |
|
||||||
| `host` | `str \| None` | No | | |
|
| `host` | `str \| None` | No | | |
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
api_key: ${env.QDRANT_API_KEY}
|
api_key: ${env.QDRANT_API_KEY:=}
|
||||||
|
kvstore:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -33,9 +33,19 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate
|
||||||
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance |
|
||||||
|
| `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
|
||||||
|
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
weaviate_api_key: null
|
||||||
|
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db
|
||||||
|
|
|
@ -366,7 +366,7 @@ The purpose of scoring function is to calculate the score for each example based
|
||||||
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
|
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
|
||||||
|
|
||||||
### Add new benchmark into template
|
### Add new benchmark into template
|
||||||
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/open-benchmark/run.yaml)
|
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/run.yaml)
|
||||||
|
|
||||||
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
|
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
|
||||||
- `benchmark_id`: identifier of the benchmark
|
- `benchmark_id`: identifier of the benchmark
|
||||||
|
@ -378,7 +378,7 @@ Secondly, you need to add the new benchmark you just created under the `benchmar
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' templates
|
Spin up llama stack server with 'open-benchmark' templates
|
||||||
```
|
```
|
||||||
llama stack run llama_stack/templates/open-benchmark/run.yaml
|
llama stack run llama_stack/distributions/open-benchmark/run.yaml
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -19,11 +19,11 @@ You have two ways to install Llama Stack:
|
||||||
cd ~/local
|
cd ~/local
|
||||||
git clone git@github.com:meta-llama/llama-stack.git
|
git clone git@github.com:meta-llama/llama-stack.git
|
||||||
|
|
||||||
conda create -n myenv python=3.10
|
python -m venv myenv
|
||||||
conda activate myenv
|
source myenv/bin/activate # On Windows: myenv\Scripts\activate
|
||||||
|
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
$CONDA_PREFIX/bin/pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
## Downloading models via CLI
|
## Downloading models via CLI
|
||||||
|
|
||||||
|
|
|
@ -19,11 +19,11 @@ You have two ways to install Llama Stack:
|
||||||
cd ~/local
|
cd ~/local
|
||||||
git clone git@github.com:meta-llama/llama-stack.git
|
git clone git@github.com:meta-llama/llama-stack.git
|
||||||
|
|
||||||
conda create -n myenv python=3.10
|
python -m venv myenv
|
||||||
conda activate myenv
|
source myenv/bin/activate # On Windows: myenv\Scripts\activate
|
||||||
|
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
$CONDA_PREFIX/bin/pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
|
|
||||||
## `llama` subcommands
|
## `llama` subcommands
|
||||||
|
|
|
@ -66,7 +66,7 @@
|
||||||
"from pydantic import BaseModel\n",
|
"from pydantic import BaseModel\n",
|
||||||
"from termcolor import cprint\n",
|
"from termcolor import cprint\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from llama_stack.distribution.datatypes import RemoteProviderConfig\n",
|
"from llama_stack.core.datatypes import RemoteProviderConfig\n",
|
||||||
"from llama_stack.apis.safety import Safety\n",
|
"from llama_stack.apis.safety import Safety\n",
|
||||||
"from llama_stack_client import LlamaStackClient\n",
|
"from llama_stack_client import LlamaStackClient\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
@ -47,20 +47,20 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
|
|
||||||
## Install Dependencies and Set Up Environment
|
## Install Dependencies and Set Up Environment
|
||||||
|
|
||||||
1. **Create a Conda Environment**:
|
1. **Install uv**:
|
||||||
Create a new Conda environment with Python 3.12:
|
Install [uv](https://docs.astral.sh/uv/) for managing dependencies:
|
||||||
```bash
|
```bash
|
||||||
conda create -n ollama python=3.12
|
# macOS and Linux
|
||||||
```
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
Activate the environment:
|
|
||||||
```bash
|
# Windows
|
||||||
conda activate ollama
|
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
||||||
```
|
```
|
||||||
|
|
||||||
2. **Install ChromaDB**:
|
2. **Install ChromaDB**:
|
||||||
Install `chromadb` using `pip`:
|
Install `chromadb` using `uv`:
|
||||||
```bash
|
```bash
|
||||||
pip install chromadb
|
uv pip install chromadb
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Run ChromaDB**:
|
3. **Run ChromaDB**:
|
||||||
|
@ -69,28 +69,21 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
chroma run --host localhost --port 8000 --path ./my_chroma_data
|
chroma run --host localhost --port 8000 --path ./my_chroma_data
|
||||||
```
|
```
|
||||||
|
|
||||||
4. **Install Llama Stack**:
|
|
||||||
Open a new terminal and install `llama-stack`:
|
|
||||||
```bash
|
|
||||||
conda activate ollama
|
|
||||||
pip install -U llama-stack
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Build, Configure, and Run Llama Stack
|
## Build, Configure, and Run Llama Stack
|
||||||
|
|
||||||
1. **Build the Llama Stack**:
|
1. **Build the Llama Stack**:
|
||||||
Build the Llama Stack using the `ollama` template:
|
Build the Llama Stack using the `starter` template:
|
||||||
```bash
|
```bash
|
||||||
llama stack build --template starter --image-type conda
|
uv run --with llama-stack llama stack build --distro starter --image-type venv
|
||||||
```
|
```
|
||||||
**Expected Output:**
|
**Expected Output:**
|
||||||
```bash
|
```bash
|
||||||
...
|
...
|
||||||
Build Successful!
|
Build Successful!
|
||||||
You can find the newly-built template here: ~/.llama/distributions/ollama/ollama-run.yaml
|
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
||||||
You can run the new Llama Stack Distro via: llama stack run ~/.llama/distributions/ollama/ollama-run.yaml --image-type conda
|
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Set the ENV variables by exporting them to the terminal**:
|
3. **Set the ENV variables by exporting them to the terminal**:
|
||||||
|
@ -102,12 +95,13 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Run the Llama Stack**:
|
3. **Run the Llama Stack**:
|
||||||
Run the stack with command shared by the API from earlier:
|
Run the stack using uv:
|
||||||
```bash
|
```bash
|
||||||
llama stack run ollama
|
uv run --with llama-stack llama stack run starter \
|
||||||
--port $LLAMA_STACK_PORT
|
--image-type venv \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
--port $LLAMA_STACK_PORT \
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
|
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
--env OLLAMA_URL=$OLLAMA_URL
|
--env OLLAMA_URL=$OLLAMA_URL
|
||||||
```
|
```
|
||||||
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||||
|
@ -120,7 +114,7 @@ After setting up the server, open a new terminal window and configure the llama-
|
||||||
|
|
||||||
1. Configure the CLI to point to the llama-stack server.
|
1. Configure the CLI to point to the llama-stack server.
|
||||||
```bash
|
```bash
|
||||||
llama-stack-client configure --endpoint http://localhost:8321
|
uv run --with llama-stack-client llama-stack-client configure --endpoint http://localhost:8321
|
||||||
```
|
```
|
||||||
**Expected Output:**
|
**Expected Output:**
|
||||||
```bash
|
```bash
|
||||||
|
@ -128,7 +122,7 @@ After setting up the server, open a new terminal window and configure the llama-
|
||||||
```
|
```
|
||||||
2. Test the CLI by running inference:
|
2. Test the CLI by running inference:
|
||||||
```bash
|
```bash
|
||||||
llama-stack-client inference chat-completion --message "Write me a 2-sentence poem about the moon"
|
uv run --with llama-stack-client llama-stack-client inference chat-completion --message "Write me a 2-sentence poem about the moon"
|
||||||
```
|
```
|
||||||
**Expected Output:**
|
**Expected Output:**
|
||||||
```bash
|
```bash
|
||||||
|
@ -170,7 +164,7 @@ curl http://localhost:$LLAMA_STACK_PORT/alpha/inference/chat-completion
|
||||||
EOF
|
EOF
|
||||||
```
|
```
|
||||||
|
|
||||||
You can check the available models with the command `llama-stack-client models list`.
|
You can check the available models with the command `uv run --with llama-stack-client llama-stack-client models list`.
|
||||||
|
|
||||||
**Expected Output:**
|
**Expected Output:**
|
||||||
```json
|
```json
|
||||||
|
@ -191,18 +185,12 @@ You can check the available models with the command `llama-stack-client models l
|
||||||
|
|
||||||
You can also interact with the Llama Stack server using a simple Python script. Below is an example:
|
You can also interact with the Llama Stack server using a simple Python script. Below is an example:
|
||||||
|
|
||||||
### 1. Activate Conda Environment
|
### 1. Create Python Script (`test_llama_stack.py`)
|
||||||
|
|
||||||
```bash
|
|
||||||
conda activate ollama
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Create Python Script (`test_llama_stack.py`)
|
|
||||||
```bash
|
```bash
|
||||||
touch test_llama_stack.py
|
touch test_llama_stack.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### 3. Create a Chat Completion Request in Python
|
### 2. Create a Chat Completion Request in Python
|
||||||
|
|
||||||
In `test_llama_stack.py`, write the following code:
|
In `test_llama_stack.py`, write the following code:
|
||||||
|
|
||||||
|
@ -233,10 +221,10 @@ response = client.inference.chat_completion(
|
||||||
print(response.completion_message.content)
|
print(response.completion_message.content)
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Run the Python Script
|
### 3. Run the Python Script
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python test_llama_stack.py
|
uv run --with llama-stack-client python test_llama_stack.py
|
||||||
```
|
```
|
||||||
|
|
||||||
**Expected Output:**
|
**Expected Output:**
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.distribution.library_client import ( # noqa: F401
|
from llama_stack.core.library_client import ( # noqa: F401
|
||||||
AsyncLlamaStackAsLibraryClient,
|
AsyncLlamaStackAsLibraryClient,
|
||||||
LlamaStackAsLibraryClient,
|
LlamaStackAsLibraryClient,
|
||||||
)
|
)
|
||||||
|
|
|
@ -152,7 +152,17 @@ Step = Annotated[
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class Turn(BaseModel):
|
class Turn(BaseModel):
|
||||||
"""A single turn in an interaction with an Agentic System."""
|
"""A single turn in an interaction with an Agentic System.
|
||||||
|
|
||||||
|
:param turn_id: Unique identifier for the turn within a session
|
||||||
|
:param session_id: Unique identifier for the conversation session
|
||||||
|
:param input_messages: List of messages that initiated this turn
|
||||||
|
:param steps: Ordered list of processing steps executed during this turn
|
||||||
|
:param output_message: The model's generated response containing content and metadata
|
||||||
|
:param output_attachments: (Optional) Files or media attached to the agent's response
|
||||||
|
:param started_at: Timestamp when the turn began
|
||||||
|
:param completed_at: (Optional) Timestamp when the turn finished, if completed
|
||||||
|
"""
|
||||||
|
|
||||||
turn_id: str
|
turn_id: str
|
||||||
session_id: str
|
session_id: str
|
||||||
|
@ -167,7 +177,13 @@ class Turn(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class Session(BaseModel):
|
class Session(BaseModel):
|
||||||
"""A single session of an interaction with an Agentic System."""
|
"""A single session of an interaction with an Agentic System.
|
||||||
|
|
||||||
|
:param session_id: Unique identifier for the conversation session
|
||||||
|
:param session_name: Human-readable name for the session
|
||||||
|
:param turns: List of all turns that have occurred in this session
|
||||||
|
:param started_at: Timestamp when the session was created
|
||||||
|
"""
|
||||||
|
|
||||||
session_id: str
|
session_id: str
|
||||||
session_name: str
|
session_name: str
|
||||||
|
@ -232,6 +248,13 @@ class AgentConfig(AgentConfigCommon):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class Agent(BaseModel):
|
class Agent(BaseModel):
|
||||||
|
"""An agent instance with configuration and metadata.
|
||||||
|
|
||||||
|
:param agent_id: Unique identifier for the agent
|
||||||
|
:param agent_config: Configuration settings for the agent
|
||||||
|
:param created_at: Timestamp when the agent was created
|
||||||
|
"""
|
||||||
|
|
||||||
agent_id: str
|
agent_id: str
|
||||||
agent_config: AgentConfig
|
agent_config: AgentConfig
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
|
@ -253,6 +276,14 @@ class AgentTurnResponseEventType(StrEnum):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseStepStartPayload(BaseModel):
|
class AgentTurnResponseStepStartPayload(BaseModel):
|
||||||
|
"""Payload for step start events in agent turn responses.
|
||||||
|
|
||||||
|
:param event_type: Type of event being reported
|
||||||
|
:param step_type: Type of step being executed
|
||||||
|
:param step_id: Unique identifier for the step within a turn
|
||||||
|
:param metadata: (Optional) Additional metadata for the step
|
||||||
|
"""
|
||||||
|
|
||||||
event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
|
event_type: Literal[AgentTurnResponseEventType.step_start] = AgentTurnResponseEventType.step_start
|
||||||
step_type: StepType
|
step_type: StepType
|
||||||
step_id: str
|
step_id: str
|
||||||
|
@ -261,6 +292,14 @@ class AgentTurnResponseStepStartPayload(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseStepCompletePayload(BaseModel):
|
class AgentTurnResponseStepCompletePayload(BaseModel):
|
||||||
|
"""Payload for step completion events in agent turn responses.
|
||||||
|
|
||||||
|
:param event_type: Type of event being reported
|
||||||
|
:param step_type: Type of step being executed
|
||||||
|
:param step_id: Unique identifier for the step within a turn
|
||||||
|
:param step_details: Complete details of the executed step
|
||||||
|
"""
|
||||||
|
|
||||||
event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
|
event_type: Literal[AgentTurnResponseEventType.step_complete] = AgentTurnResponseEventType.step_complete
|
||||||
step_type: StepType
|
step_type: StepType
|
||||||
step_id: str
|
step_id: str
|
||||||
|
@ -269,6 +308,14 @@ class AgentTurnResponseStepCompletePayload(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseStepProgressPayload(BaseModel):
|
class AgentTurnResponseStepProgressPayload(BaseModel):
|
||||||
|
"""Payload for step progress events in agent turn responses.
|
||||||
|
|
||||||
|
:param event_type: Type of event being reported
|
||||||
|
:param step_type: Type of step being executed
|
||||||
|
:param step_id: Unique identifier for the step within a turn
|
||||||
|
:param delta: Incremental content changes during step execution
|
||||||
|
"""
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
|
|
||||||
event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
|
event_type: Literal[AgentTurnResponseEventType.step_progress] = AgentTurnResponseEventType.step_progress
|
||||||
|
@ -280,18 +327,36 @@ class AgentTurnResponseStepProgressPayload(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseTurnStartPayload(BaseModel):
|
class AgentTurnResponseTurnStartPayload(BaseModel):
|
||||||
|
"""Payload for turn start events in agent turn responses.
|
||||||
|
|
||||||
|
:param event_type: Type of event being reported
|
||||||
|
:param turn_id: Unique identifier for the turn within a session
|
||||||
|
"""
|
||||||
|
|
||||||
event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
|
event_type: Literal[AgentTurnResponseEventType.turn_start] = AgentTurnResponseEventType.turn_start
|
||||||
turn_id: str
|
turn_id: str
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseTurnCompletePayload(BaseModel):
|
class AgentTurnResponseTurnCompletePayload(BaseModel):
|
||||||
|
"""Payload for turn completion events in agent turn responses.
|
||||||
|
|
||||||
|
:param event_type: Type of event being reported
|
||||||
|
:param turn: Complete turn data including all steps and results
|
||||||
|
"""
|
||||||
|
|
||||||
event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
|
event_type: Literal[AgentTurnResponseEventType.turn_complete] = AgentTurnResponseEventType.turn_complete
|
||||||
turn: Turn
|
turn: Turn
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
|
class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
|
||||||
|
"""Payload for turn awaiting input events in agent turn responses.
|
||||||
|
|
||||||
|
:param event_type: Type of event being reported
|
||||||
|
:param turn: Turn data when waiting for external tool responses
|
||||||
|
"""
|
||||||
|
|
||||||
event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
|
event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input] = AgentTurnResponseEventType.turn_awaiting_input
|
||||||
turn: Turn
|
turn: Turn
|
||||||
|
|
||||||
|
@ -310,21 +375,47 @@ register_schema(AgentTurnResponseEventPayload, name="AgentTurnResponseEventPaylo
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseEvent(BaseModel):
|
class AgentTurnResponseEvent(BaseModel):
|
||||||
|
"""An event in an agent turn response stream.
|
||||||
|
|
||||||
|
:param payload: Event-specific payload containing event data
|
||||||
|
"""
|
||||||
|
|
||||||
payload: AgentTurnResponseEventPayload
|
payload: AgentTurnResponseEventPayload
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentCreateResponse(BaseModel):
|
class AgentCreateResponse(BaseModel):
|
||||||
|
"""Response returned when creating a new agent.
|
||||||
|
|
||||||
|
:param agent_id: Unique identifier for the created agent
|
||||||
|
"""
|
||||||
|
|
||||||
agent_id: str
|
agent_id: str
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentSessionCreateResponse(BaseModel):
|
class AgentSessionCreateResponse(BaseModel):
|
||||||
|
"""Response returned when creating a new agent session.
|
||||||
|
|
||||||
|
:param session_id: Unique identifier for the created session
|
||||||
|
"""
|
||||||
|
|
||||||
session_id: str
|
session_id: str
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
||||||
|
"""Request to create a new turn for an agent.
|
||||||
|
|
||||||
|
:param agent_id: Unique identifier for the agent
|
||||||
|
:param session_id: Unique identifier for the conversation session
|
||||||
|
:param messages: List of messages to start the turn with
|
||||||
|
:param documents: (Optional) List of documents to provide to the agent
|
||||||
|
:param toolgroups: (Optional) List of tool groups to make available for this turn
|
||||||
|
:param stream: (Optional) Whether to stream the response
|
||||||
|
:param tool_config: (Optional) Tool configuration to override agent defaults
|
||||||
|
"""
|
||||||
|
|
||||||
agent_id: str
|
agent_id: str
|
||||||
session_id: str
|
session_id: str
|
||||||
|
|
||||||
|
@ -342,6 +433,15 @@ class AgentTurnCreateRequest(AgentConfigOverridablePerTurn):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResumeRequest(BaseModel):
|
class AgentTurnResumeRequest(BaseModel):
|
||||||
|
"""Request to resume an agent turn with tool responses.
|
||||||
|
|
||||||
|
:param agent_id: Unique identifier for the agent
|
||||||
|
:param session_id: Unique identifier for the conversation session
|
||||||
|
:param turn_id: Unique identifier for the turn within a session
|
||||||
|
:param tool_responses: List of tool responses to submit to continue the turn
|
||||||
|
:param stream: (Optional) Whether to stream the response
|
||||||
|
"""
|
||||||
|
|
||||||
agent_id: str
|
agent_id: str
|
||||||
session_id: str
|
session_id: str
|
||||||
turn_id: str
|
turn_id: str
|
||||||
|
@ -351,13 +451,21 @@ class AgentTurnResumeRequest(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentTurnResponseStreamChunk(BaseModel):
|
class AgentTurnResponseStreamChunk(BaseModel):
|
||||||
"""streamed agent turn completion response."""
|
"""Streamed agent turn completion response.
|
||||||
|
|
||||||
|
:param event: Individual event in the agent turn response stream
|
||||||
|
"""
|
||||||
|
|
||||||
event: AgentTurnResponseEvent
|
event: AgentTurnResponseEvent
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AgentStepResponse(BaseModel):
|
class AgentStepResponse(BaseModel):
|
||||||
|
"""Response containing details of a specific agent step.
|
||||||
|
|
||||||
|
:param step: The complete step data and execution details
|
||||||
|
"""
|
||||||
|
|
||||||
step: Step
|
step: Step
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,18 +18,37 @@ from llama_stack.schema_utils import json_schema_type, register_schema
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseError(BaseModel):
|
class OpenAIResponseError(BaseModel):
|
||||||
|
"""Error details for failed OpenAI response requests.
|
||||||
|
|
||||||
|
:param code: Error code identifying the type of failure
|
||||||
|
:param message: Human-readable error message describing the failure
|
||||||
|
"""
|
||||||
|
|
||||||
code: str
|
code: str
|
||||||
message: str
|
message: str
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseInputMessageContentText(BaseModel):
|
class OpenAIResponseInputMessageContentText(BaseModel):
|
||||||
|
"""Text content for input messages in OpenAI response format.
|
||||||
|
|
||||||
|
:param text: The text content of the input message
|
||||||
|
:param type: Content type identifier, always "input_text"
|
||||||
|
"""
|
||||||
|
|
||||||
text: str
|
text: str
|
||||||
type: Literal["input_text"] = "input_text"
|
type: Literal["input_text"] = "input_text"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseInputMessageContentImage(BaseModel):
|
class OpenAIResponseInputMessageContentImage(BaseModel):
|
||||||
|
"""Image content for input messages in OpenAI response format.
|
||||||
|
|
||||||
|
:param detail: Level of detail for image processing, can be "low", "high", or "auto"
|
||||||
|
:param type: Content type identifier, always "input_image"
|
||||||
|
:param image_url: (Optional) URL of the image content
|
||||||
|
"""
|
||||||
|
|
||||||
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
||||||
type: Literal["input_image"] = "input_image"
|
type: Literal["input_image"] = "input_image"
|
||||||
# TODO: handle file_id
|
# TODO: handle file_id
|
||||||
|
@ -46,6 +65,14 @@ register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMess
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||||
|
"""File citation annotation for referencing specific files in response content.
|
||||||
|
|
||||||
|
:param type: Annotation type identifier, always "file_citation"
|
||||||
|
:param file_id: Unique identifier of the referenced file
|
||||||
|
:param filename: Name of the referenced file
|
||||||
|
:param index: Position index of the citation within the content
|
||||||
|
"""
|
||||||
|
|
||||||
type: Literal["file_citation"] = "file_citation"
|
type: Literal["file_citation"] = "file_citation"
|
||||||
file_id: str
|
file_id: str
|
||||||
filename: str
|
filename: str
|
||||||
|
@ -54,6 +81,15 @@ class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseAnnotationCitation(BaseModel):
|
class OpenAIResponseAnnotationCitation(BaseModel):
|
||||||
|
"""URL citation annotation for referencing external web resources.
|
||||||
|
|
||||||
|
:param type: Annotation type identifier, always "url_citation"
|
||||||
|
:param end_index: End position of the citation span in the content
|
||||||
|
:param start_index: Start position of the citation span in the content
|
||||||
|
:param title: Title of the referenced web resource
|
||||||
|
:param url: URL of the referenced web resource
|
||||||
|
"""
|
||||||
|
|
||||||
type: Literal["url_citation"] = "url_citation"
|
type: Literal["url_citation"] = "url_citation"
|
||||||
end_index: int
|
end_index: int
|
||||||
start_index: int
|
start_index: int
|
||||||
|
@ -122,6 +158,13 @@ class OpenAIResponseMessage(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
|
class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
|
||||||
|
"""Web search tool call output message for OpenAI responses.
|
||||||
|
|
||||||
|
:param id: Unique identifier for this tool call
|
||||||
|
:param status: Current status of the web search operation
|
||||||
|
:param type: Tool call type identifier, always "web_search_call"
|
||||||
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
status: str
|
status: str
|
||||||
type: Literal["web_search_call"] = "web_search_call"
|
type: Literal["web_search_call"] = "web_search_call"
|
||||||
|
@ -129,6 +172,15 @@ class OpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
||||||
|
"""File search tool call output message for OpenAI responses.
|
||||||
|
|
||||||
|
:param id: Unique identifier for this tool call
|
||||||
|
:param queries: List of search queries executed
|
||||||
|
:param status: Current status of the file search operation
|
||||||
|
:param type: Tool call type identifier, always "file_search_call"
|
||||||
|
:param results: (Optional) Search results returned by the file search operation
|
||||||
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
queries: list[str]
|
queries: list[str]
|
||||||
status: str
|
status: str
|
||||||
|
@ -138,6 +190,16 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
|
class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
|
||||||
|
"""Function tool call output message for OpenAI responses.
|
||||||
|
|
||||||
|
:param call_id: Unique identifier for the function call
|
||||||
|
:param name: Name of the function being called
|
||||||
|
:param arguments: JSON string containing the function arguments
|
||||||
|
:param type: Tool call type identifier, always "function_call"
|
||||||
|
:param id: (Optional) Additional identifier for the tool call
|
||||||
|
:param status: (Optional) Current status of the function call execution
|
||||||
|
"""
|
||||||
|
|
||||||
call_id: str
|
call_id: str
|
||||||
name: str
|
name: str
|
||||||
arguments: str
|
arguments: str
|
||||||
|
@ -148,6 +210,17 @@ class OpenAIResponseOutputMessageFunctionToolCall(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseOutputMessageMCPCall(BaseModel):
|
class OpenAIResponseOutputMessageMCPCall(BaseModel):
|
||||||
|
"""Model Context Protocol (MCP) call output message for OpenAI responses.
|
||||||
|
|
||||||
|
:param id: Unique identifier for this MCP call
|
||||||
|
:param type: Tool call type identifier, always "mcp_call"
|
||||||
|
:param arguments: JSON string containing the MCP call arguments
|
||||||
|
:param name: Name of the MCP method being called
|
||||||
|
:param server_label: Label identifying the MCP server handling the call
|
||||||
|
:param error: (Optional) Error message if the MCP call failed
|
||||||
|
:param output: (Optional) Output result from the successful MCP call
|
||||||
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
type: Literal["mcp_call"] = "mcp_call"
|
type: Literal["mcp_call"] = "mcp_call"
|
||||||
arguments: str
|
arguments: str
|
||||||
|
@ -158,6 +231,13 @@ class OpenAIResponseOutputMessageMCPCall(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class MCPListToolsTool(BaseModel):
|
class MCPListToolsTool(BaseModel):
|
||||||
|
"""Tool definition returned by MCP list tools operation.
|
||||||
|
|
||||||
|
:param input_schema: JSON schema defining the tool's input parameters
|
||||||
|
:param name: Name of the tool
|
||||||
|
:param description: (Optional) Description of what the tool does
|
||||||
|
"""
|
||||||
|
|
||||||
input_schema: dict[str, Any]
|
input_schema: dict[str, Any]
|
||||||
name: str
|
name: str
|
||||||
description: str | None = None
|
description: str | None = None
|
||||||
|
@ -165,6 +245,14 @@ class MCPListToolsTool(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseOutputMessageMCPListTools(BaseModel):
|
class OpenAIResponseOutputMessageMCPListTools(BaseModel):
|
||||||
|
"""MCP list tools output message containing available tools from an MCP server.
|
||||||
|
|
||||||
|
:param id: Unique identifier for this MCP list tools operation
|
||||||
|
:param type: Tool call type identifier, always "mcp_list_tools"
|
||||||
|
:param server_label: Label identifying the MCP server providing the tools
|
||||||
|
:param tools: List of available tools provided by the MCP server
|
||||||
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
type: Literal["mcp_list_tools"] = "mcp_list_tools"
|
type: Literal["mcp_list_tools"] = "mcp_list_tools"
|
||||||
server_label: str
|
server_label: str
|
||||||
|
@ -206,11 +294,34 @@ class OpenAIResponseTextFormat(TypedDict, total=False):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseText(BaseModel):
|
class OpenAIResponseText(BaseModel):
|
||||||
|
"""Text response configuration for OpenAI responses.
|
||||||
|
|
||||||
|
:param format: (Optional) Text format configuration specifying output format requirements
|
||||||
|
"""
|
||||||
|
|
||||||
format: OpenAIResponseTextFormat | None = None
|
format: OpenAIResponseTextFormat | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObject(BaseModel):
|
class OpenAIResponseObject(BaseModel):
|
||||||
|
"""Complete OpenAI response object containing generation results and metadata.
|
||||||
|
|
||||||
|
:param created_at: Unix timestamp when the response was created
|
||||||
|
:param error: (Optional) Error details if the response generation failed
|
||||||
|
:param id: Unique identifier for this response
|
||||||
|
:param model: Model identifier used for generation
|
||||||
|
:param object: Object type identifier, always "response"
|
||||||
|
:param output: List of generated output items (messages, tool calls, etc.)
|
||||||
|
:param parallel_tool_calls: Whether tool calls can be executed in parallel
|
||||||
|
:param previous_response_id: (Optional) ID of the previous response in a conversation
|
||||||
|
:param status: Current status of the response generation
|
||||||
|
:param temperature: (Optional) Sampling temperature used for generation
|
||||||
|
:param text: Text formatting configuration for the response
|
||||||
|
:param top_p: (Optional) Nucleus sampling parameter used for generation
|
||||||
|
:param truncation: (Optional) Truncation strategy applied to the response
|
||||||
|
:param user: (Optional) User identifier associated with the request
|
||||||
|
"""
|
||||||
|
|
||||||
created_at: int
|
created_at: int
|
||||||
error: OpenAIResponseError | None = None
|
error: OpenAIResponseError | None = None
|
||||||
id: str
|
id: str
|
||||||
|
@ -231,6 +342,13 @@ class OpenAIResponseObject(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIDeleteResponseObject(BaseModel):
|
class OpenAIDeleteResponseObject(BaseModel):
|
||||||
|
"""Response object confirming deletion of an OpenAI response.
|
||||||
|
|
||||||
|
:param id: Unique identifier of the deleted response
|
||||||
|
:param object: Object type identifier, always "response"
|
||||||
|
:param deleted: Deletion confirmation flag, always True
|
||||||
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
object: Literal["response"] = "response"
|
object: Literal["response"] = "response"
|
||||||
deleted: bool = True
|
deleted: bool = True
|
||||||
|
@ -238,18 +356,39 @@ class OpenAIDeleteResponseObject(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
|
class OpenAIResponseObjectStreamResponseCreated(BaseModel):
|
||||||
|
"""Streaming event indicating a new response has been created.
|
||||||
|
|
||||||
|
:param response: The newly created response object
|
||||||
|
:param type: Event type identifier, always "response.created"
|
||||||
|
"""
|
||||||
|
|
||||||
response: OpenAIResponseObject
|
response: OpenAIResponseObject
|
||||||
type: Literal["response.created"] = "response.created"
|
type: Literal["response.created"] = "response.created"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
|
class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
|
||||||
|
"""Streaming event indicating a response has been completed.
|
||||||
|
|
||||||
|
:param response: The completed response object
|
||||||
|
:param type: Event type identifier, always "response.completed"
|
||||||
|
"""
|
||||||
|
|
||||||
response: OpenAIResponseObject
|
response: OpenAIResponseObject
|
||||||
type: Literal["response.completed"] = "response.completed"
|
type: Literal["response.completed"] = "response.completed"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
|
class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
|
||||||
|
"""Streaming event for when a new output item is added to the response.
|
||||||
|
|
||||||
|
:param response_id: Unique identifier of the response containing this output
|
||||||
|
:param item: The output item that was added (message, tool call, etc.)
|
||||||
|
:param output_index: Index position of this item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.output_item.added"
|
||||||
|
"""
|
||||||
|
|
||||||
response_id: str
|
response_id: str
|
||||||
item: OpenAIResponseOutput
|
item: OpenAIResponseOutput
|
||||||
output_index: int
|
output_index: int
|
||||||
|
@ -259,6 +398,15 @@ class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
|
class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
|
||||||
|
"""Streaming event for when an output item is completed.
|
||||||
|
|
||||||
|
:param response_id: Unique identifier of the response containing this output
|
||||||
|
:param item: The completed output item (message, tool call, etc.)
|
||||||
|
:param output_index: Index position of this item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.output_item.done"
|
||||||
|
"""
|
||||||
|
|
||||||
response_id: str
|
response_id: str
|
||||||
item: OpenAIResponseOutput
|
item: OpenAIResponseOutput
|
||||||
output_index: int
|
output_index: int
|
||||||
|
@ -268,6 +416,16 @@ class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
||||||
|
"""Streaming event for incremental text content updates.
|
||||||
|
|
||||||
|
:param content_index: Index position within the text content
|
||||||
|
:param delta: Incremental text content being added
|
||||||
|
:param item_id: Unique identifier of the output item being updated
|
||||||
|
:param output_index: Index position of the item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.output_text.delta"
|
||||||
|
"""
|
||||||
|
|
||||||
content_index: int
|
content_index: int
|
||||||
delta: str
|
delta: str
|
||||||
item_id: str
|
item_id: str
|
||||||
|
@ -278,6 +436,16 @@ class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
|
class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
|
||||||
|
"""Streaming event for when text output is completed.
|
||||||
|
|
||||||
|
:param content_index: Index position within the text content
|
||||||
|
:param text: Final complete text content of the output item
|
||||||
|
:param item_id: Unique identifier of the completed output item
|
||||||
|
:param output_index: Index position of the item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.output_text.done"
|
||||||
|
"""
|
||||||
|
|
||||||
content_index: int
|
content_index: int
|
||||||
text: str # final text of the output item
|
text: str # final text of the output item
|
||||||
item_id: str
|
item_id: str
|
||||||
|
@ -288,6 +456,15 @@ class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
|
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
|
||||||
|
"""Streaming event for incremental function call argument updates.
|
||||||
|
|
||||||
|
:param delta: Incremental function call arguments being added
|
||||||
|
:param item_id: Unique identifier of the function call being updated
|
||||||
|
:param output_index: Index position of the item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.function_call_arguments.delta"
|
||||||
|
"""
|
||||||
|
|
||||||
delta: str
|
delta: str
|
||||||
item_id: str
|
item_id: str
|
||||||
output_index: int
|
output_index: int
|
||||||
|
@ -297,6 +474,15 @@ class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
|
class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
|
||||||
|
"""Streaming event for when function call arguments are completed.
|
||||||
|
|
||||||
|
:param arguments: Final complete arguments JSON string for the function call
|
||||||
|
:param item_id: Unique identifier of the completed function call
|
||||||
|
:param output_index: Index position of the item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.function_call_arguments.done"
|
||||||
|
"""
|
||||||
|
|
||||||
arguments: str # final arguments of the function call
|
arguments: str # final arguments of the function call
|
||||||
item_id: str
|
item_id: str
|
||||||
output_index: int
|
output_index: int
|
||||||
|
@ -306,6 +492,14 @@ class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
|
class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
|
||||||
|
"""Streaming event for web search calls in progress.
|
||||||
|
|
||||||
|
:param item_id: Unique identifier of the web search call
|
||||||
|
:param output_index: Index position of the item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.web_search_call.in_progress"
|
||||||
|
"""
|
||||||
|
|
||||||
item_id: str
|
item_id: str
|
||||||
output_index: int
|
output_index: int
|
||||||
sequence_number: int
|
sequence_number: int
|
||||||
|
@ -322,6 +516,14 @@ class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
|
class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
|
||||||
|
"""Streaming event for completed web search calls.
|
||||||
|
|
||||||
|
:param item_id: Unique identifier of the completed web search call
|
||||||
|
:param output_index: Index position of the item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.web_search_call.completed"
|
||||||
|
"""
|
||||||
|
|
||||||
item_id: str
|
item_id: str
|
||||||
output_index: int
|
output_index: int
|
||||||
sequence_number: int
|
sequence_number: int
|
||||||
|
@ -366,6 +568,14 @@ class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
|
class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
|
||||||
|
"""Streaming event for MCP calls in progress.
|
||||||
|
|
||||||
|
:param item_id: Unique identifier of the MCP call
|
||||||
|
:param output_index: Index position of the item in the output list
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.mcp_call.in_progress"
|
||||||
|
"""
|
||||||
|
|
||||||
item_id: str
|
item_id: str
|
||||||
output_index: int
|
output_index: int
|
||||||
sequence_number: int
|
sequence_number: int
|
||||||
|
@ -374,12 +584,24 @@ class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
|
class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
|
||||||
|
"""Streaming event for failed MCP calls.
|
||||||
|
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.mcp_call.failed"
|
||||||
|
"""
|
||||||
|
|
||||||
sequence_number: int
|
sequence_number: int
|
||||||
type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
|
type: Literal["response.mcp_call.failed"] = "response.mcp_call.failed"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
|
class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
|
||||||
|
"""Streaming event for completed MCP calls.
|
||||||
|
|
||||||
|
:param sequence_number: Sequential number for ordering streaming events
|
||||||
|
:param type: Event type identifier, always "response.mcp_call.completed"
|
||||||
|
"""
|
||||||
|
|
||||||
sequence_number: int
|
sequence_number: int
|
||||||
type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
|
type: Literal["response.mcp_call.completed"] = "response.mcp_call.completed"
|
||||||
|
|
||||||
|
@ -442,6 +664,12 @@ WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_20
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseInputToolWebSearch(BaseModel):
|
class OpenAIResponseInputToolWebSearch(BaseModel):
|
||||||
|
"""Web search tool configuration for OpenAI response inputs.
|
||||||
|
|
||||||
|
:param type: Web search tool type variant to use
|
||||||
|
:param search_context_size: (Optional) Size of search context, must be "low", "medium", or "high"
|
||||||
|
"""
|
||||||
|
|
||||||
# Must match values of WebSearchToolTypes above
|
# Must match values of WebSearchToolTypes above
|
||||||
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
|
type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
|
||||||
"web_search"
|
"web_search"
|
||||||
|
@ -453,6 +681,15 @@ class OpenAIResponseInputToolWebSearch(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseInputToolFunction(BaseModel):
|
class OpenAIResponseInputToolFunction(BaseModel):
|
||||||
|
"""Function tool configuration for OpenAI response inputs.
|
||||||
|
|
||||||
|
:param type: Tool type identifier, always "function"
|
||||||
|
:param name: Name of the function that can be called
|
||||||
|
:param description: (Optional) Description of what the function does
|
||||||
|
:param parameters: (Optional) JSON schema defining the function's parameters
|
||||||
|
:param strict: (Optional) Whether to enforce strict parameter validation
|
||||||
|
"""
|
||||||
|
|
||||||
type: Literal["function"] = "function"
|
type: Literal["function"] = "function"
|
||||||
name: str
|
name: str
|
||||||
description: str | None = None
|
description: str | None = None
|
||||||
|
@ -462,6 +699,15 @@ class OpenAIResponseInputToolFunction(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseInputToolFileSearch(BaseModel):
|
class OpenAIResponseInputToolFileSearch(BaseModel):
|
||||||
|
"""File search tool configuration for OpenAI response inputs.
|
||||||
|
|
||||||
|
:param type: Tool type identifier, always "file_search"
|
||||||
|
:param vector_store_ids: List of vector store identifiers to search within
|
||||||
|
:param filters: (Optional) Additional filters to apply to the search
|
||||||
|
:param max_num_results: (Optional) Maximum number of search results to return (1-50)
|
||||||
|
:param ranking_options: (Optional) Options for ranking and scoring search results
|
||||||
|
"""
|
||||||
|
|
||||||
type: Literal["file_search"] = "file_search"
|
type: Literal["file_search"] = "file_search"
|
||||||
vector_store_ids: list[str]
|
vector_store_ids: list[str]
|
||||||
filters: dict[str, Any] | None = None
|
filters: dict[str, Any] | None = None
|
||||||
|
@ -470,16 +716,37 @@ class OpenAIResponseInputToolFileSearch(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class ApprovalFilter(BaseModel):
|
class ApprovalFilter(BaseModel):
|
||||||
|
"""Filter configuration for MCP tool approval requirements.
|
||||||
|
|
||||||
|
:param always: (Optional) List of tool names that always require approval
|
||||||
|
:param never: (Optional) List of tool names that never require approval
|
||||||
|
"""
|
||||||
|
|
||||||
always: list[str] | None = None
|
always: list[str] | None = None
|
||||||
never: list[str] | None = None
|
never: list[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
class AllowedToolsFilter(BaseModel):
|
class AllowedToolsFilter(BaseModel):
|
||||||
|
"""Filter configuration for restricting which MCP tools can be used.
|
||||||
|
|
||||||
|
:param tool_names: (Optional) List of specific tool names that are allowed
|
||||||
|
"""
|
||||||
|
|
||||||
tool_names: list[str] | None = None
|
tool_names: list[str] | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseInputToolMCP(BaseModel):
|
class OpenAIResponseInputToolMCP(BaseModel):
|
||||||
|
"""Model Context Protocol (MCP) tool configuration for OpenAI response inputs.
|
||||||
|
|
||||||
|
:param type: Tool type identifier, always "mcp"
|
||||||
|
:param server_label: Label to identify this MCP server
|
||||||
|
:param server_url: URL endpoint of the MCP server
|
||||||
|
:param headers: (Optional) HTTP headers to include when connecting to the server
|
||||||
|
:param require_approval: Approval requirement for tool calls ("always", "never", or filter)
|
||||||
|
:param allowed_tools: (Optional) Restriction on which tools can be used from this server
|
||||||
|
"""
|
||||||
|
|
||||||
type: Literal["mcp"] = "mcp"
|
type: Literal["mcp"] = "mcp"
|
||||||
server_label: str
|
server_label: str
|
||||||
server_url: str
|
server_url: str
|
||||||
|
@ -500,17 +767,37 @@ register_schema(OpenAIResponseInputTool, name="OpenAIResponseInputTool")
|
||||||
|
|
||||||
|
|
||||||
class ListOpenAIResponseInputItem(BaseModel):
|
class ListOpenAIResponseInputItem(BaseModel):
|
||||||
|
"""List container for OpenAI response input items.
|
||||||
|
|
||||||
|
:param data: List of input items
|
||||||
|
:param object: Object type identifier, always "list"
|
||||||
|
"""
|
||||||
|
|
||||||
data: list[OpenAIResponseInput]
|
data: list[OpenAIResponseInput]
|
||||||
object: Literal["list"] = "list"
|
object: Literal["list"] = "list"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
||||||
|
"""OpenAI response object extended with input context information.
|
||||||
|
|
||||||
|
:param input: List of input items that led to this response
|
||||||
|
"""
|
||||||
|
|
||||||
input: list[OpenAIResponseInput]
|
input: list[OpenAIResponseInput]
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ListOpenAIResponseObject(BaseModel):
|
class ListOpenAIResponseObject(BaseModel):
|
||||||
|
"""Paginated list of OpenAI response objects with navigation metadata.
|
||||||
|
|
||||||
|
:param data: List of response objects with their input context
|
||||||
|
:param has_more: Whether there are more results available beyond this page
|
||||||
|
:param first_id: Identifier of the first item in this page
|
||||||
|
:param last_id: Identifier of the last item in this page
|
||||||
|
:param object: Object type identifier, always "list"
|
||||||
|
"""
|
||||||
|
|
||||||
data: list[OpenAIResponseObjectWithInput]
|
data: list[OpenAIResponseObjectWithInput]
|
||||||
has_more: bool
|
has_more: bool
|
||||||
first_id: str
|
first_id: str
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue