Merge remote-tracking branch 'upstream/main' into patch-1

This commit is contained in:
Dean Wampler 2025-08-07 11:53:07 -04:00
commit 688f42ba3a
No known key found for this signature in database
586 changed files with 65959 additions and 3793 deletions

View file

@ -0,0 +1,82 @@
name: 'Run and Record Tests'
description: 'Run integration tests and handle recording/artifact upload'
inputs:
test-types:
description: 'JSON array of test types to run'
required: true
stack-config:
description: 'Stack configuration to use'
required: true
provider:
description: 'Provider to use for tests'
required: true
inference-mode:
description: 'Inference mode (record or replay)'
required: true
run-vision-tests:
description: 'Whether to run vision tests'
required: false
default: 'false'
runs:
using: 'composite'
steps:
- name: Check Storage and Memory Available Before Tests
if: ${{ always() }}
shell: bash
run: |
free -h
df -h
- name: Run Integration Tests
shell: bash
run: |
./scripts/integration-tests.sh \
--stack-config '${{ inputs.stack-config }}' \
--provider '${{ inputs.provider }}' \
--test-types '${{ inputs.test-types }}' \
--inference-mode '${{ inputs.inference-mode }}' \
${{ inputs.run-vision-tests == 'true' && '--run-vision-tests' || '' }}
- name: Commit and push recordings
if: ${{ inputs.inference-mode == 'record' }}
shell: bash
run: |
echo "Checking for recording changes"
git status --porcelain tests/integration/recordings/
if [[ -n $(git status --porcelain tests/integration/recordings/) ]]; then
echo "New recordings detected, committing and pushing"
git add tests/integration/recordings/
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
git commit -m "Recordings update from CI (vision)"
else
git commit -m "Recordings update from CI"
fi
git fetch origin ${{ github.event.pull_request.head.ref }}
git rebase origin/${{ github.event.pull_request.head.ref }}
echo "Rebased successfully"
git push origin HEAD:${{ github.event.pull_request.head.ref }}
echo "Pushed successfully"
else
echo "No recording changes"
fi
- name: Write inference logs to file
if: ${{ always() }}
shell: bash
run: |
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
- name: Upload logs
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
path: |
*.log
retention-days: 1

View file

@ -1,11 +1,23 @@
name: Setup Ollama
description: Start Ollama
inputs:
run-vision-tests:
description: 'Run vision tests: "true" or "false"'
required: false
default: 'false'
runs:
using: "composite"
steps:
- name: Start Ollama
shell: bash
run: |
docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
image="ollama-with-vision-model"
else
image="ollama-with-models"
fi
echo "Starting Ollama with image: $image"
docker run -d --name ollama -p 11434:11434 docker.io/llamastack/$image
echo "Verifying Ollama status..."
timeout 30 bash -c 'while ! curl -s -L http://127.0.0.1:11434; do sleep 1 && echo "."; done'

View file

@ -0,0 +1,51 @@
name: 'Setup Test Environment'
description: 'Common setup steps for integration tests including dependencies, providers, and build'
inputs:
python-version:
description: 'Python version to use'
required: true
client-version:
description: 'Client version (latest or published)'
required: true
provider:
description: 'Provider to setup (ollama or vllm)'
required: true
default: 'ollama'
run-vision-tests:
description: 'Whether to setup provider for vision tests'
required: false
default: 'false'
inference-mode:
description: 'Inference mode (record or replay)'
required: true
runs:
using: 'composite'
steps:
- name: Install dependencies
uses: ./.github/actions/setup-runner
with:
python-version: ${{ inputs.python-version }}
client-version: ${{ inputs.client-version }}
- name: Setup ollama
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-ollama
with:
run-vision-tests: ${{ inputs.run-vision-tests }}
- name: Setup vllm
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-vllm
- name: Build Llama Stack
shell: bash
run: |
uv run llama stack build --template ci-tests --image-type venv
- name: Configure git for commits
shell: bash
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"

View file

@ -1,19 +1,19 @@
# Llama Stack CI
Llama Stack uses GitHub Actions for Continous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a table detailing what CI the project includes and the purpose.
| Name | File | Purpose |
| ---- | ---- | ------- |
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
| Coverage Badge | [coverage-badge.yml](coverage-badge.yml) | Creates PR for updating the code coverage badge |
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
| Integration Auth Tests | [integration-auth-tests.yml](integration-auth-tests.yml) | Run the integration test suite with Kubernetes authentication |
| SqlStore Integration Tests | [integration-sql-store-tests.yml](integration-sql-store-tests.yml) | Run the integration test suite with SqlStore |
| Integration Tests | [integration-tests.yml](integration-tests.yml) | Run the integration test suite with Ollama |
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suite from tests/integration in replay mode |
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
| Close stale issues and PRs | [stale_bot.yml](stale_bot.yml) | Run the Stale Bot action |
| Test External Providers Installed via Module | [test-external-provider-module.yml](test-external-provider-module.yml) | Test External Provider installation via Python module |

View file

@ -1,62 +0,0 @@
name: Coverage Badge
run-name: Creates PR for updating the code coverage badge
on:
push:
branches: [ main ]
paths:
- 'llama_stack/**'
- 'tests/unit/**'
- 'uv.lock'
- 'pyproject.toml'
- 'requirements.txt'
- '.github/workflows/unit-tests.yml'
- '.github/workflows/coverage-badge.yml' # This workflow
workflow_dispatch:
jobs:
unit-tests:
permissions:
contents: write # for peter-evans/create-pull-request to create branch
pull-requests: write # for peter-evans/create-pull-request to create a PR
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Run unit tests
run: |
./scripts/unit-tests.sh
- name: Coverage Badge
uses: tj-actions/coverage-badge-py@1788babcb24544eb5bbb6e0d374df5d1e54e670f # v2.0.4
- name: Verify Changed files
uses: tj-actions/verify-changed-files@a1c6acee9df209257a246f2cc6ae8cb6581c1edf # v20.0.4
id: verify-changed-files
with:
files: coverage.svg
- name: Commit files
if: steps.verify-changed-files.outputs.files_changed == 'true'
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add coverage.svg
git commit -m "Updated coverage.svg"
- name: Create Pull Request
if: steps.verify-changed-files.outputs.files_changed == 'true'
uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
with:
token: ${{ secrets.GITHUB_TOKEN }}
title: "ci: [Automatic] Coverage Badge Update"
body: |
This PR updates the coverage badge based on the latest coverage report.
Automatically generated by the [workflow coverage-badge.yaml](.github/workflows/coverage-badge.yaml)
delete-branch: true

View file

@ -1,20 +1,22 @@
name: Integration Tests
name: Integration Tests (Replay)
run-name: Run the integration test suite with Ollama
run-name: Run the integration test suite from tests/integration in replay mode
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened]
paths:
- 'llama_stack/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
- 'requirements.txt'
- '.github/workflows/integration-tests.yml' # This workflow
- '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
schedule:
# If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
@ -31,129 +33,64 @@ on:
default: 'ollama'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
# Skip concurrency for pushes to main - each commit should be tested independently
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
cancel-in-progress: true
jobs:
discover-tests:
runs-on: ubuntu-latest
outputs:
test-type: ${{ steps.generate-matrix.outputs.test-type }}
test-types: ${{ steps.generate-test-types.outputs.test-types }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Generate test matrix
id: generate-matrix
- name: Generate test types
id: generate-test-types
run: |
# Get test directories dynamically, excluding non-test directories
# NOTE: we are excluding post_training since the tests take too long
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases)$" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-type=$TEST_TYPES" >> $GITHUB_OUTPUT
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
test-matrix:
run-replay-mode-tests:
needs: discover-tests
runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, vision={4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.run-vision-tests) }}
strategy:
fail-fast: false
matrix:
test-type: ${{ fromJson(needs.discover-tests.outputs.test-type) }}
client-type: [library, server]
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
python-version: ["3.12", "3.13"]
client-version: ${{ (github.event.schedule == '0 0 * * 0' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
exclude: # TODO: look into why these tests are failing and fix them
- provider: vllm
test-type: safety
- provider: vllm
test-type: post_training
- provider: vllm
test-type: tool_runtime
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
run-vision-tests: [true, false]
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }}
run-vision-tests: ${{ matrix.run-vision-tests }}
inference-mode: 'replay'
- name: Setup ollama
if: ${{ matrix.provider == 'ollama' }}
uses: ./.github/actions/setup-ollama
- name: Setup vllm
if: ${{ matrix.provider == 'vllm' }}
uses: ./.github/actions/setup-vllm
- name: Build Llama Stack
run: |
uv run llama stack build --template ci-tests --image-type venv
- name: Check Storage and Memory Available Before Tests
if: ${{ always() }}
run: |
free -h
df -h
- name: Run Integration Tests
env:
LLAMA_STACK_CLIENT_TIMEOUT: "300" # Increased timeout for eval operations
# Use 'shell' to get pipefail behavior
# https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#exit-codes-and-error-action-preference
# TODO: write a precommit hook to detect if a test contains a pipe but does not use 'shell: bash'
shell: bash
run: |
if [ "${{ matrix.client-type }}" == "library" ]; then
stack_config="ci-tests"
else
stack_config="server:ci-tests"
fi
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
if [ "${{ matrix.provider }}" == "ollama" ]; then
export OLLAMA_URL="http://0.0.0.0:11434"
export TEXT_MODEL=ollama/llama3.2:3b-instruct-fp16
export SAFETY_MODEL="ollama/llama-guard3:1b"
EXTRA_PARAMS="--safety-shield=llama-guard"
else
export VLLM_URL="http://localhost:8000/v1"
export TEXT_MODEL=vllm/meta-llama/Llama-3.2-1B-Instruct
# TODO: remove the not(test_inference_store_tool_calls) once we can get the tool called consistently
EXTRA_PARAMS=
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
fi
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
-k "not( ${EXCLUDE_TESTS} )" \
--text-model=$TEXT_MODEL \
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
--color=yes ${EXTRA_PARAMS} \
--capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
- name: Check Storage and Memory Available After Tests
if: ${{ always() }}
run: |
free -h
df -h
- name: Write inference logs to file
if: ${{ always() }}
run: |
sudo docker logs ollama > ollama.log || true
sudo docker logs vllm > vllm.log || true
- name: Upload all logs to artifacts
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
- name: Run tests
uses: ./.github/actions/run-and-record-tests
with:
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.provider }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}-${{ matrix.client-version }}
path: |
*.log
retention-days: 1
test-types: ${{ needs.discover-tests.outputs.test-types }}
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
inference-mode: 'replay'
run-vision-tests: ${{ matrix.run-vision-tests }}

View file

@ -24,7 +24,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector"]
vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "remote::chromadb", "remote::pgvector", "remote::weaviate", "remote::qdrant"]
python-version: ["3.12", "3.13"]
fail-fast: false # we want to run all tests regardless of failure
@ -48,6 +48,14 @@ jobs:
-e ANONYMIZED_TELEMETRY=FALSE \
chromadb/chroma:latest
- name: Setup Weaviate
if: matrix.vector-io-provider == 'remote::weaviate'
run: |
docker run --rm -d --pull always \
--name weaviate \
-p 8080:8080 -p 50051:50051 \
cr.weaviate.io/semitechnologies/weaviate:1.32.0
- name: Start PGVector DB
if: matrix.vector-io-provider == 'remote::pgvector'
run: |
@ -78,6 +86,29 @@ jobs:
PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
-c "CREATE EXTENSION IF NOT EXISTS vector;"
- name: Setup Qdrant
if: matrix.vector-io-provider == 'remote::qdrant'
run: |
docker run --rm -d --pull always \
--name qdrant \
-p 6333:6333 \
qdrant/qdrant
- name: Wait for Qdrant to be ready
if: matrix.vector-io-provider == 'remote::qdrant'
run: |
echo "Waiting for Qdrant to be ready..."
for i in {1..30}; do
if curl -s http://localhost:6333/collections | grep -q '"status":"ok"'; then
echo "Qdrant is ready!"
exit 0
fi
sleep 2
done
echo "Qdrant failed to start"
docker logs qdrant
exit 1
- name: Wait for ChromaDB to be ready
if: matrix.vector-io-provider == 'remote::chromadb'
run: |
@ -93,6 +124,21 @@ jobs:
docker logs chromadb
exit 1
- name: Wait for Weaviate to be ready
if: matrix.vector-io-provider == 'remote::weaviate'
run: |
echo "Waiting for Weaviate to be ready..."
for i in {1..30}; do
if curl -s http://localhost:8080 | grep -q "https://weaviate.io/developers/weaviate/current/"; then
echo "Weaviate is ready!"
exit 0
fi
sleep 2
done
echo "Weaviate failed to start"
docker logs weaviate
exit 1
- name: Build Llama Stack
run: |
uv run llama stack build --template ci-tests --image-type venv
@ -113,6 +159,10 @@ jobs:
PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
ENABLE_QDRANT: ${{ matrix.vector-io-provider == 'remote::qdrant' && 'true' || '' }}
QDRANT_URL: ${{ matrix.vector-io-provider == 'remote::qdrant' && 'http://localhost:6333' || '' }}
ENABLE_WEAVIATE: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'true' || '' }}
WEAVIATE_CLUSTER_URL: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'localhost:8080' || '' }}
run: |
uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
tests/integration/vector_io \
@ -134,6 +184,11 @@ jobs:
run: |
docker logs chromadb > chromadb.log
- name: Write Qdrant logs to file
if: ${{ always() && matrix.vector-io-provider == 'remote::qdrant' }}
run: |
docker logs qdrant > qdrant.log
- name: Upload all logs to artifacts
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2

View file

@ -9,20 +9,20 @@ on:
paths:
- 'llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py'
- 'llama_stack/distribution/build.*'
- 'llama_stack/distribution/*.sh'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**'
- 'llama_stack/distributions/**'
- 'pyproject.toml'
pull_request:
paths:
- 'llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py'
- 'llama_stack/distribution/build.*'
- 'llama_stack/distribution/*.sh'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**'
- 'llama_stack/distributions/**'
- 'pyproject.toml'
concurrency:
@ -33,23 +33,23 @@ jobs:
generate-matrix:
runs-on: ubuntu-latest
outputs:
templates: ${{ steps.set-matrix.outputs.templates }}
distros: ${{ steps.set-matrix.outputs.distros }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Generate Template List
- name: Generate Distribution List
id: set-matrix
run: |
templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
echo "templates=$templates" >> "$GITHUB_OUTPUT"
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
echo "distros=$distros" >> "$GITHUB_OUTPUT"
build:
needs: generate-matrix
runs-on: ubuntu-latest
strategy:
matrix:
template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
image-type: [venv, container]
fail-fast: false # We want to run all jobs even if some fail
@ -62,13 +62,13 @@ jobs:
- name: Print build dependencies
run: |
uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
- name: Run Llama Stack Build
run: |
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
- name: Print dependencies in the image
if: matrix.image-type == 'venv'
@ -99,16 +99,16 @@ jobs:
- name: Build a single provider
run: |
yq -i '.image_type = "container"' llama_stack/templates/ci-tests/build.yaml
yq -i '.image_name = "test"' llama_stack/templates/ci-tests/build.yaml
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/templates/ci-tests/build.yaml
yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
- name: Inspect the container image entrypoint
run: |
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
echo "Entrypoint: $entrypoint"
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
echo "Entrypoint is not correct"
exit 1
fi
@ -122,27 +122,27 @@ jobs:
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Pin template to UBI9 base
- name: Pin distribution to UBI9 base
run: |
yq -i '
.image_type = "container" |
.image_name = "ubi9-test" |
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
' llama_stack/templates/ci-tests/build.yaml
' llama_stack/distributions/ci-tests/build.yaml
- name: Build dev container (UBI9)
env:
USE_COPY_NOT_MOUNT: "true"
LLAMA_STACK_DIR: "."
run: |
uv run llama stack build --config llama_stack/templates/ci-tests/build.yaml
uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
- name: Inspect UBI9 image
run: |
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
echo "Entrypoint: $entrypoint"
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
echo "Entrypoint is not correct"
exit 1
fi

View file

@ -0,0 +1,109 @@
name: Integration Tests (Record)
run-name: Run the integration test suite from tests/integration
on:
pull_request:
branches: [ main ]
types: [opened, synchronize, labeled]
paths:
- 'llama_stack/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
- '.github/workflows/record-integration-tests.yml' # This workflow
- '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml'
workflow_dispatch:
inputs:
test-provider:
description: 'Test against a specific provider'
type: string
default: 'ollama'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
discover-tests:
if: contains(github.event.pull_request.labels.*.name, 're-record-tests') ||
contains(github.event.pull_request.labels.*.name, 're-record-vision-tests')
runs-on: ubuntu-latest
outputs:
test-types: ${{ steps.generate-test-types.outputs.test-types }}
matrix-modes: ${{ steps.generate-test-types.outputs.matrix-modes }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Generate test types
id: generate-test-types
run: |
# Get test directories dynamically, excluding non-test directories
TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" |
grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" |
sort | jq -R -s -c 'split("\n")[:-1]')
echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT
labels=$(gh pr view ${{ github.event.pull_request.number }} --json labels --jq '.labels[].name')
echo "labels=$labels"
modes_array=()
if [[ $labels == *"re-record-vision-tests"* ]]; then
modes_array+=("vision")
fi
if [[ $labels == *"re-record-tests"* ]]; then
modes_array+=("non-vision")
fi
# Convert to JSON array
if [ ${#modes_array[@]} -eq 0 ]; then
matrix_modes="[]"
else
matrix_modes=$(printf '%s\n' "${modes_array[@]}" | jq -R -s -c 'split("\n")[:-1]')
fi
echo "matrix_modes=$matrix_modes"
echo "matrix-modes=$matrix_modes" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
record-tests:
needs: discover-tests
runs-on: ubuntu-latest
permissions:
contents: write
strategy:
fail-fast: false
matrix:
mode: ${{ fromJSON(needs.discover-tests.outputs.matrix-modes) }}
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ github.event.pull_request.head.ref }}
fetch-depth: 0
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
provider: ${{ inputs.test-provider || 'ollama' }}
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}
inference-mode: 'record'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
test-types: ${{ needs.discover-tests.outputs.test-types }}
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider || 'ollama' }}
inference-mode: 'record'
run-vision-tests: ${{ matrix.mode == 'vision' && 'true' || 'false' }}

View file

@ -12,12 +12,13 @@ on:
- 'tests/integration/**'
- 'uv.lock'
- 'pyproject.toml'
- 'requirements.txt'
- 'tests/external/*'
- '.github/workflows/test-external-provider-module.yml' # This workflow
jobs:
test-external-providers-from-module:
# This workflow is disabled. See https://github.com/meta-llama/llama-stack/pull/2975#issuecomment-3138702984 for details
if: false
runs-on: ubuntu-latest
strategy:
matrix:
@ -47,7 +48,7 @@ jobs:
- name: Build distro from config file
run: |
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/ramalama-stack/build.yaml
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
- name: Start Llama Stack server in background
if: ${{ matrix.image-type }} == 'venv'

View file

@ -43,11 +43,11 @@ jobs:
- name: Print distro dependencies
run: |
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml --print-deps-only
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only
- name: Build distro from config file
run: |
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml
- name: Start Llama Stack server in background
if: ${{ matrix.image-type }} == 'venv'

View file

@ -451,7 +451,7 @@ GenAI application developers need more than just an LLM - they need to integrate
Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stacks plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stacks plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
## Release
After iterating on the APIs for the last 3 months, today were launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.

View file

@ -157,6 +157,7 @@ uv sync
that describes the configuration. These descriptions will be used to generate the provider
documentation.
* When possible, use keyword arguments only when calling functions.
* Llama Stack utilizes [custom Exception classes](llama_stack/apis/common/errors.py) for certain Resources that should be used where applicable.
## Common Tasks
@ -164,7 +165,7 @@ Some tips about common tasks you work on while contributing to Llama Stack:
### Using `llama stack build`
Building a stack image (conda / docker) will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
Example:
```bash
@ -172,7 +173,7 @@ cd work/
git clone https://github.com/meta-llama/llama-stack.git
git clone https://github.com/meta-llama/llama-stack-client-python.git
cd llama-stack
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
```
### Updating distribution configurations

View file

@ -1,9 +1,9 @@
include pyproject.toml
include llama_stack/models/llama/llama3/tokenizer.model
include llama_stack/models/llama/llama4/tokenizer.model
include llama_stack/distribution/*.sh
include llama_stack/core/*.sh
include llama_stack/cli/scripts/*.sh
include llama_stack/templates/*/*.yaml
include llama_stack/distributions/*/*.yaml
include llama_stack/providers/tests/test_cases/inference/*.json
include llama_stack/models/llama/*/*.md
include llama_stack/tests/integration/*.jpg

View file

@ -6,7 +6,6 @@
[![Discord](https://img.shields.io/discord/1257833999603335178?color=6A7EC2&logo=discord&logoColor=ffffff)](https://discord.gg/llama-stack)
[![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
[![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
![coverage badge](./coverage.svg)
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
@ -112,7 +111,7 @@ Here is a list of the various API providers and available distributions that can
Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| SambaNova | Hosted | | ✅ | | ✅ | | | | |
| Cerebras | Hosted | | ✅ | | | | | | |
@ -124,6 +123,10 @@ Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/pro
| TGI | Hosted/Single Node | | ✅ | | | | | | |
| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
| Milvus | Hosted/Single Node | | | ✅ | | | | | |
| Qdrant | Hosted/Single Node | | | ✅ | | | | | |
| Weaviate | Hosted/Single Node | | | ✅ | | | | | |
| SQLite-vec | Single Node | | | ✅ | | | | | |
| PG Vector | Single Node | | | ✅ | | | | | |
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
| vLLM | Single Node | | ✅ | | | | | | |

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -123,7 +123,7 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n",
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
"!uv run --with llama-stack llama stack build --template together --image-type venv \n",
"!uv run --with llama-stack llama stack build --distro together --image-type venv \n",
"\n",
"def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
@ -165,7 +165,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -233,7 +233,7 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n",
"# this command installs all the dependencies needed for the llama stack server \n",
"!uv run --with llama-stack llama stack build --template meta-reference-gpu --image-type venv \n",
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv \n",
"\n",
"def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
@ -275,7 +275,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -223,7 +223,7 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n",
"# this command installs all the dependencies needed for the llama stack server \n",
"!uv run --with llama-stack llama stack build --template llama_api --image-type venv \n",
"!uv run --with llama-stack llama stack build --distro llama_api --image-type venv \n",
"\n",
"def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
@ -265,7 +265,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -37,7 +37,7 @@
"\n",
"To learn more about torchtune: https://github.com/pytorch/torchtune\n",
"\n",
"We will use [experimental-post-training](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/experimental-post-training) as the distribution template\n",
"We will use [experimental-post-training](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/experimental-post-training) as the distribution template\n",
"\n",
"#### 0.0. Prerequisite: Have an OpenAI API key\n",
"In this showcase, we will use [braintrust](https://www.braintrust.dev/) as scoring provider for eval and it uses OpenAI model as judge model for scoring. So, you need to get an API key from [OpenAI developer platform](https://platform.openai.com/docs/overview).\n",
@ -2864,7 +2864,7 @@
}
],
"source": [
"!llama stack build --template experimental-post-training --image-type venv --image-name __system__"
"!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
]
},
{
@ -3216,19 +3216,19 @@
"INFO:datasets:Duckdb version 1.1.3 available.\n",
"INFO:datasets:TensorFlow version 2.18.0 available.\n",
"INFO:datasets:JAX version 0.4.33 available.\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
"INFO:llama_stack.distribution.stack:\n"
"INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n",
"INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n",
"INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
"INFO:llama_stack.core.stack:\n"
]
},
{
@ -3448,7 +3448,7 @@
"\n",
"os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
"\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
"_ = client.initialize()"
]

View file

@ -38,7 +38,7 @@
"source": [
"# NBVAL_SKIP\n",
"!pip install -U llama-stack\n",
"!UV_SYSTEM_PYTHON=1 llama stack build --template fireworks --image-type venv"
"!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
]
},
{
@ -48,7 +48,7 @@
"outputs": [],
"source": [
"from llama_stack_client import LlamaStackClient, Agent\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"from rich.pretty import pprint\n",
"import json\n",
"import uuid\n",

View file

@ -57,7 +57,7 @@
"outputs": [],
"source": [
"# NBVAL_SKIP\n",
"!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv"
"!UV_SYSTEM_PYTHON=1 llama stack build --distro together --image-type venv"
]
},
{
@ -661,7 +661,7 @@
"except ImportError:\n",
" print(\"Not in Google Colab environment\")\n",
"\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"together\")\n",
"_ = client.initialize()"

View file

@ -35,7 +35,7 @@
],
"source": [
"from llama_stack_client import LlamaStackClient, Agent\n",
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"from rich.pretty import pprint\n",
"import json\n",
"import uuid\n",

View file

@ -92,7 +92,7 @@
"metadata": {},
"source": [
"```bash\n",
"LLAMA_STACK_DIR=$(pwd) llama stack build --template nvidia --image-type venv\n",
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
"```"
]
},
@ -194,7 +194,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -81,7 +81,7 @@
"metadata": {},
"source": [
"```bash\n",
"LLAMA_STACK_DIR=$(pwd) llama stack build --template nvidia --image-type venv\n",
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
"```"
]
},

View file

@ -56,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -56,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -56,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
"\n",
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
"client.initialize()"

View file

@ -1 +1 @@
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.

View file

@ -17,7 +17,7 @@ import fire
import ruamel.yaml as yaml
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
from llama_stack.distribution.stack import LlamaStack # noqa: E402
from llama_stack.core.stack import LlamaStack # noqa: E402
from .pyopenapi.options import Options # noqa: E402
from .pyopenapi.specification import Info, Server # noqa: E402

View file

@ -12,7 +12,7 @@ from typing import TextIO
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
from llama_stack.distribution.resolver import api_protocol_map
from llama_stack.core.resolver import api_protocol_map
from .generator import Generator
from .options import Options

View file

@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository.
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository.
## Limitations

View file

@ -145,12 +145,12 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n",
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
"!uv run --with llama-stack llama stack build --template starter --image-type venv\n",
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
"\n",
"def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
" process = subprocess.Popen(\n",
" f\"uv run --with llama-stack llama stack run starter --image-type venv --env INFERENCE_MODEL=llama3.2:3b\",\n",
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv",
" shell=True,\n",
" stdout=log_file,\n",
" stderr=log_file,\n",
@ -187,7 +187,7 @@
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# inline::meta-reference
## Description

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# remote::nvidia
## Description

View file

@ -43,7 +43,7 @@ We have built-in functionality to run the supported open-benckmarks using llama-
Spin up llama stack server with 'open-benchmark' template
```
llama stack run llama_stack/templates/open-benchmark/run.yaml
llama stack run llama_stack/distributions/open-benchmark/run.yaml
```

View file

@ -23,7 +23,7 @@ To use the HF SFTTrainer in your Llama Stack project, follow these steps:
You can access the HuggingFace trainer via the `ollama` distribution:
```bash
llama stack build --template starter --image-type venv
llama stack build --distro starter --image-type venv
llama stack run --image-type venv ~/.llama/distributions/ollama/ollama-run.yaml
```

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# inline::huggingface
## Description

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# inline::torchtune
## Description

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# remote::nvidia
## Description

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# inline::basic
## Description

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# inline::braintrust
## Description

View file

@ -1,3 +1,7 @@
---
orphan: true
---
# inline::llm-as-judge
## Description

View file

@ -355,7 +355,7 @@ server:
8. Run the server:
```bash
python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
```
9. Test the API:

View file

@ -97,11 +97,11 @@ To start the Llama Stack Playground, run the following commands:
1. Start up the Llama Stack API server
```bash
llama stack build --template together --image-type conda
llama stack build --distro together --image-type venv
llama stack run together
```
2. Start Streamlit UI
```bash
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
```

View file

@ -11,4 +11,5 @@ See the [Adding a New API Provider](new_api_provider.md) which describes how to
:hidden:
new_api_provider
testing
```

View file

@ -6,7 +6,7 @@ This guide will walk you through the process of adding a new API provider to Lla
- Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
- Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
- Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
- Update any distribution {repopath}`Templates::llama_stack/templates/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
- Update any distribution {repopath}`Templates::llama_stack/distributions/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
Here are some example PRs to help you get started:
@ -52,7 +52,7 @@ def get_base_url(self) -> str:
## Testing the Provider
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --template together`.
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
### 1. Integration Testing

View file

@ -174,7 +174,7 @@ spec:
- name: llama-stack
image: localhost/llama-stack-run-k8s:latest
imagePullPolicy: IfNotPresent
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
ports:
- containerPort: 5000
volumeMounts:

View file

@ -47,30 +47,37 @@ pip install -e .
```
Use the CLI to build your distribution.
The main points to consider are:
1. **Image Type** - Do you want a Conda / venv environment or a Container (eg. Docker)
1. **Image Type** - Do you want a venv environment or a Container (eg. Docker)
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
```
llama stack build -h
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--list-templates] [--image-type {conda,container,venv}] [--image-name IMAGE_NAME] [--print-deps-only] [--run]
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--distro DISTRIBUTION] [--list-distros] [--image-type {container,venv}] [--image-name IMAGE_NAME] [--print-deps-only]
[--run] [--providers PROVIDERS]
Build a Llama stack container
options:
-h, --help show this help message and exit
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
be prompted to enter information interactively (default: None)
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
--image-type {conda,container,venv}
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to
enter information interactively (default: None)
--template TEMPLATE (deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default:
None)
--distro DISTRIBUTION, --distribution DISTRIBUTION
Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default: None)
--list-distros, --list-distributions
Show the available distributions for building a Llama Stack distribution (default: False)
--image-type {container,venv}
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
--image-name IMAGE_NAME
[for image-type=conda|container|venv] Name of the conda or virtual environment to use for the build. If not specified, currently active environment will be used if
found. (default: None)
[for image-type=container|venv] Name of the virtual environment to use for the build. If not specified, currently active environment will be used if found. (default:
None)
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
--providers PROVIDERS
Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per
API. (default: None)
```
After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.
@ -141,7 +148,7 @@ You may then pick a template to build your distribution with providers fitted to
For example, to build a distribution with TGI as the inference provider, you can run:
```
$ llama stack build --template starter
$ llama stack build --distro starter
...
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
```
@ -159,7 +166,7 @@ It would be best to start with a template and understand the structure of the co
llama stack build
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda
> Enter the image type you want your Llama Stack to be built as (container or venv): venv
Llama Stack is composed of several APIs working together. Let's select
the provider types (implementations) you want to use for these APIs.
@ -184,10 +191,10 @@ You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack
:::{tab-item} Building from a pre-existing build config file
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
- The config file will be of contents like the ones in `llama_stack/templates/*build.yaml`.
- The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
```
llama stack build --config llama_stack/templates/starter/build.yaml
llama stack build --config llama_stack/distributions/starter/build.yaml
```
:::
@ -253,11 +260,11 @@ Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podm
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
```
llama stack build --template starter --image-type container
llama stack build --distro starter --image-type container
```
```
$ llama stack build --template starter --image-type container
$ llama stack build --distro starter --image-type container
...
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
...
@ -312,7 +319,7 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
```
llama stack run -h
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
[--image-type {conda,venv}] [--enable-ui]
[--image-type {venv}] [--enable-ui]
[config | template]
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
@ -326,8 +333,8 @@ options:
--image-name IMAGE_NAME
Name of the image to run. Defaults to the current environment (default: None)
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
--image-type {conda,venv}
Image Type used during the build. This can be either conda or venv. (default: None)
--image-type {venv}
Image Type used during the build. This should be venv. (default: None)
--enable-ui Start the UI server (default: False)
```
@ -342,9 +349,6 @@ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-
# Start using a venv
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
# Start using a conda environment
llama stack run --image-type conda ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
```
```

View file

@ -10,7 +10,6 @@ The default `run.yaml` files generated by templates are starting points for your
```yaml
version: 2
conda_env: ollama
apis:
- agents
- inference

View file

@ -6,11 +6,11 @@ This avoids the overhead of setting up a server.
```bash
# setup
uv pip install llama-stack
llama stack build --template starter --image-type venv
llama stack build --distro starter --image-type venv
```
```python
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from llama_stack.core.library_client import LlamaStackAsLibraryClient
client = LlamaStackAsLibraryClient(
"starter",

View file

@ -9,6 +9,7 @@ This section provides an overview of the distributions available in Llama Stack.
list_of_distributions
building_distro
customizing_run_yaml
starting_llama_stack_server
importing_as_library
configuration
```

View file

@ -34,6 +34,13 @@ data:
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard

View file

@ -52,7 +52,7 @@ spec:
value: "${SAFETY_MODEL}"
- name: TAVILY_SEARCH_API_KEY
value: "${TAVILY_SEARCH_API_KEY}"
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
ports:
- containerPort: 8321
volumeMounts:

View file

@ -31,6 +31,13 @@ providers:
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard

View file

@ -56,12 +56,12 @@ Breaking down the demo app, this section will show the core pieces that are used
### Setup Remote Inferencing
Start a Llama Stack server on localhost. Here is an example of how you can do this using the firework.ai distribution:
```
conda create -n stack-fireworks python=3.10
conda activate stack-fireworks
uv venv starter --python 3.12
source starter/bin/activate # On Windows: starter\Scripts\activate
pip install --no-cache llama-stack==0.2.2
llama stack build --template fireworks --image-type conda
llama stack build --distro starter --image-type venv
export FIREWORKS_API_KEY=<SOME_KEY>
llama stack run fireworks --port 5050
llama stack run starter --port 5050
```
Ensure the Llama Stack server version is the same as the Kotlin SDK Library for maximum compatibility.

View file

@ -57,7 +57,7 @@ Make sure you have access to a watsonx API Key. You can get one by referring [wa
## Running Llama Stack with watsonx
You can do this via Conda (build code), venv or Docker which has a pre-built image.
You can do this via venv or Docker which has a pre-built image.
### Via Docker
@ -76,13 +76,3 @@ docker run \
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
--env WATSONX_BASE_URL=$WATSONX_BASE_URL
```
### Via Conda
```bash
llama stack build --template watsonx --image-type conda
llama stack run ./run.yaml \
--port $LLAMA_STACK_PORT \
--env WATSONX_API_KEY=$WATSONX_API_KEY \
--env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
```

View file

@ -114,7 +114,7 @@ podman run --rm -it \
## Running Llama Stack
Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via venv or Docker which has a pre-built image.
### Via Docker
@ -153,7 +153,7 @@ docker run \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v $HOME/.llama:/root/.llama \
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
-v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
llamastack/distribution-dell \
--config /root/my-run.yaml \
--port $LLAMA_STACK_PORT \
@ -164,12 +164,12 @@ docker run \
--env CHROMA_URL=$CHROMA_URL
```
### Via Conda
### Via venv
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
```bash
llama stack build --template dell --image-type conda
llama stack build --distro dell --image-type venv
llama stack run dell
--port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=$INFERENCE_MODEL \

View file

@ -70,7 +70,7 @@ $ llama model list --downloaded
## Running the Distribution
You can do this via Conda (build code) or Docker which has a pre-built image.
You can do this via venv or Docker which has a pre-built image.
### Via Docker
@ -104,12 +104,12 @@ docker run \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
```
### Via Conda
### Via venv
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
```bash
llama stack build --template meta-reference-gpu --image-type conda
llama stack build --distro meta-reference-gpu --image-type venv
llama stack run distributions/meta-reference-gpu/run.yaml \
--port 8321 \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct

View file

@ -133,7 +133,7 @@ curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-inst
## Running Llama Stack with NVIDIA
You can do this via Conda or venv (build code), or Docker which has a pre-built image.
You can do this via venv (build code), or Docker which has a pre-built image.
### Via Docker
@ -152,24 +152,13 @@ docker run \
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
```
### Via Conda
```bash
INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
llama stack build --template nvidia --image-type conda
llama stack run ./run.yaml \
--port 8321 \
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \
--env INFERENCE_MODEL=$INFERENCE_MODEL
```
### Via venv
If you've set up your local development environment, you can also build the image using your local virtual environment.
```bash
INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
llama stack build --template nvidia --image-type venv
llama stack build --distro nvidia --image-type venv
llama stack run ./run.yaml \
--port 8321 \
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \

View file

@ -100,10 +100,6 @@ The following environment variables can be configured:
### Model Configuration
- `INFERENCE_MODEL`: HuggingFace model for serverless inference
- `INFERENCE_ENDPOINT_NAME`: HuggingFace endpoint name
- `OLLAMA_INFERENCE_MODEL`: Ollama model name
- `OLLAMA_EMBEDDING_MODEL`: Ollama embedding model name
- `OLLAMA_EMBEDDING_DIMENSION`: Ollama embedding dimension (default: `384`)
- `VLLM_INFERENCE_MODEL`: vLLM model name
### Vector Database Configuration
- `SQLITE_STORE_DIR`: SQLite store directory (default: `~/.llama/distributions/starter`)
@ -127,47 +123,29 @@ The following environment variables can be configured:
## Enabling Providers
You can enable specific providers by setting their provider ID to a valid value using environment variables. This is useful when you want to use certain providers or don't have the required API keys.
You can enable specific providers by setting appropriate environment variables. For example,
### Examples of Enabling Providers
#### Enable FAISS Vector Provider
```bash
export ENABLE_FAISS=faiss
# self-hosted
export OLLAMA_URL=http://localhost:11434 # enables the Ollama inference provider
export VLLM_URL=http://localhost:8000/v1 # enables the vLLM inference provider
export TGI_URL=http://localhost:8000/v1 # enables the TGI inference provider
# cloud-hosted requiring API key configuration on the server
export CEREBRAS_API_KEY=your_cerebras_api_key # enables the Cerebras inference provider
export NVIDIA_API_KEY=your_nvidia_api_key # enables the NVIDIA inference provider
# vector providers
export MILVUS_URL=http://localhost:19530 # enables the Milvus vector provider
export CHROMADB_URL=http://localhost:8000/v1 # enables the ChromaDB vector provider
export PGVECTOR_DB=llama_stack_db # enables the PGVector vector provider
```
#### Enable Ollama Models
```bash
export ENABLE_OLLAMA=ollama
```
#### Disable vLLM Models
```bash
export VLLM_INFERENCE_MODEL=__disabled__
```
#### Disable Optional Vector Providers
```bash
export ENABLE_SQLITE_VEC=__disabled__
export ENABLE_CHROMADB=__disabled__
export ENABLE_PGVECTOR=__disabled__
```
### Provider ID Patterns
The starter distribution uses several patterns for provider IDs:
1. **Direct provider IDs**: `faiss`, `ollama`, `vllm`
2. **Environment-based provider IDs**: `${env.ENABLE_SQLITE_VEC:+sqlite-vec}`
3. **Model-based provider IDs**: `${env.OLLAMA_INFERENCE_MODEL:__disabled__}`
When using the `+` pattern (like `${env.ENABLE_SQLITE_VEC+sqlite-vec}`), the provider is enabled by default and can be disabled by setting the environment variable to `__disabled__`.
When using the `:` pattern (like `${env.OLLAMA_INFERENCE_MODEL:__disabled__}`), the provider is disabled by default and can be enabled by setting the environment variable to a valid value.
This distribution comes with a default "llama-guard" shield that can be enabled by setting the `SAFETY_MODEL` environment variable to point to an appropriate Llama Guard model id. Use `llama-stack-client models list` to see the list of available models.
## Running the Distribution
You can run the starter distribution via Docker, Conda, or venv.
You can run the starter distribution via Docker or venv.
### Via Docker
@ -186,12 +164,12 @@ docker run \
--port $LLAMA_STACK_PORT
```
### Via Conda or venv
### Via venv
Ensure you have configured the starter distribution using the environment variables explained above.
```bash
uv run --with llama-stack llama stack build --template starter --image-type <conda|venv> --run
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
```
## Example Usage

View file

@ -11,12 +11,6 @@ This is the simplest way to get started. Using Llama Stack as a library means yo
Another simple way to start interacting with Llama Stack is to just spin up a container (via Docker or Podman) which is pre-built with all the providers you need. We provide a number of pre-built images so you can start a Llama Stack server instantly. You can also build your own custom container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](selection) for more details.
## Conda:
If you have a custom or an advanced setup or you are developing on Llama Stack you can also build a custom Llama Stack server. Using `llama stack build` and `llama stack run` you can build/run a custom Llama Stack server containing the exact combination of providers you wish. We have also provided various templates to make getting started easier. See [Building a Custom Distribution](building_distro) for more details.
## Kubernetes:
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](kubernetes_deployment) for more details.

View file

@ -59,10 +59,10 @@ Now let's build and run the Llama Stack config for Ollama.
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
```bash
llama stack build --template starter --image-type venv --run
llama stack build --distro starter --image-type venv --run
```
:::
:::{tab-item} Using `conda`
:::{tab-item} Using `venv`
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
Llama Stack uses a [YAML configuration file](../distributions/configuration.md) to specify the stack setup,
@ -70,7 +70,7 @@ which defines the providers and their settings.
Now let's build and run the Llama Stack config for Ollama.
```bash
llama stack build --template starter --image-type conda --run
llama stack build --distro starter --image-type venv --run
```
:::
:::{tab-item} Using a Container
@ -150,13 +150,7 @@ pip install llama-stack-client
```
:::
:::{tab-item} Install with `conda`
```bash
yes | conda create -n stack-client python=3.12
conda activate stack-client
pip install llama-stack-client
```
:::
::::
Now let's use the `llama-stack-client` [CLI](../references/llama_stack_client_cli_reference.md) to check the

View file

@ -16,10 +16,13 @@ as the inference [provider](../providers/inference/index) for a Llama Model.
```bash
ollama run llama3.2:3b --keepalive 60m
```
#### Step 2: Run the Llama Stack server
We will use `uv` to run the Llama Stack server.
```bash
uv run --with llama-stack llama stack build --template starter --image-type venv --run
OLLAMA_URL=http://localhost:11434 \
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
```
#### Step 3: Run the demo
Now open up a new terminal and copy the following script into a file named `demo_script.py`.

View file

@ -1,5 +1,13 @@
# Agents Providers
# Agents
## Overview
This section contains documentation for all available providers for the **agents** API.
- [inline::meta-reference](inline_meta-reference.md)
## Providers
```{toctree}
:maxdepth: 1
inline_meta-reference
```

View file

@ -1,7 +1,15 @@
# Datasetio Providers
# Datasetio
## Overview
This section contains documentation for all available providers for the **datasetio** API.
- [inline::localfs](inline_localfs.md)
- [remote::huggingface](remote_huggingface.md)
- [remote::nvidia](remote_nvidia.md)
## Providers
```{toctree}
:maxdepth: 1
inline_localfs
remote_huggingface
remote_nvidia
```

View file

@ -1,6 +1,14 @@
# Eval Providers
# Eval
## Overview
This section contains documentation for all available providers for the **eval** API.
- [inline::meta-reference](inline_meta-reference.md)
- [remote::nvidia](remote_nvidia.md)
## Providers
```{toctree}
:maxdepth: 1
inline_meta-reference
remote_nvidia
```

View file

@ -1,9 +1,4 @@
# External Providers Guide
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
- Create and maintain your own providers independently
- Share providers with others without contributing to the main codebase
- Keep provider-specific code separate from the core Llama Stack code
# Creating External Providers
## Configuration
@ -55,17 +50,6 @@ Llama Stack supports two types of external providers:
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
2. **Inline Providers**: Providers that run locally within the Llama Stack process
## Known External Providers
Here's a list of known external providers that you can use with Llama Stack:
| Name | Description | API | Type | Repository |
|------|-------------|-----|------|------------|
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
### Remote Provider Specification
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
@ -119,9 +103,9 @@ container_image: custom-vector-store:latest # optional
- `provider_data_validator`: Optional validator for provider data
- `container_image`: Optional container image to use instead of pip packages
## Required Implementation
## Required Fields
## All Providers
### All Providers
All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
@ -146,7 +130,7 @@ def get_provider_spec() -> ProviderSpec:
)
```
### Remote Providers
#### Remote Providers
Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
1. `config`: An instance of the provider's config class
@ -162,7 +146,7 @@ async def get_adapter_impl(
return OllamaInferenceAdapter(config)
```
### Inline Providers
#### Inline Providers
Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
1. `config`: An instance of the provider's config class
@ -189,7 +173,40 @@ Version: 0.1.0
Location: /path/to/venv/lib/python3.10/site-packages
```
## Example using `external_providers_dir`: Custom Ollama Provider
## Best Practices
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
4. **Documentation**: Include clear documentation in your provider package about:
- Installation requirements
- Configuration options
- Usage examples
- Any limitations or known issues
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
You can refer to the [integration tests
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
information. Execute the test for the Provider type you are developing.
## Troubleshooting
If your external provider isn't being loaded:
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
1. Check that the `external_providers_dir` path is correct and accessible.
2. Verify that the YAML files are properly formatted.
3. Ensure all required Python packages are installed.
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
information using `LLAMA_STACK_LOGGING=all=debug`.
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
## Examples
### Example using `external_providers_dir`: Custom Ollama Provider
Here's a complete example of creating and using a custom Ollama provider:
@ -241,7 +258,7 @@ external_providers_dir: ~/.llama/providers.d/
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
## Example using `module`: ramalama-stack
### Example using `module`: ramalama-stack
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
@ -267,34 +284,3 @@ additional_pip_packages:
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
The provider will now be available in Llama Stack with the type `remote::ramalama`.
## Best Practices
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
4. **Documentation**: Include clear documentation in your provider package about:
- Installation requirements
- Configuration options
- Usage examples
- Any limitations or known issues
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
You can refer to the [integration tests
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
information. Execute the test for the Provider type you are developing.
## Troubleshooting
If your external provider isn't being loaded:
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
1. Check that the `external_providers_dir` path is correct and accessible.
2. Verify that the YAML files are properly formatted.
3. Ensure all required Python packages are installed.
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
information using `LLAMA_STACK_LOGGING=all=debug`.
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.

View file

@ -0,0 +1,10 @@
# Known External Providers
Here's a list of known external providers that you can use with Llama Stack:
| Name | Description | API | Type | Repository |
|------|-------------|-----|------|------------|
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |

13
docs/source/providers/external/index.md vendored Normal file
View file

@ -0,0 +1,13 @@
# External Providers
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
- Create and maintain your own providers independently
- Share providers with others without contributing to the main codebase
- Keep provider-specific code separate from the core Llama Stack code
```{toctree}
:maxdepth: 1
external-providers-list
external-providers-guide
```

View file

@ -1,5 +1,13 @@
# Files Providers
# Files
## Overview
This section contains documentation for all available providers for the **files** API.
- [inline::localfs](inline_localfs.md)
## Providers
```{toctree}
:maxdepth: 1
inline_localfs
```

View file

@ -8,7 +8,7 @@ Local filesystem-based file storage provider for managing files and documents lo
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `storage_dir` | `<class 'str'>` | No | PydanticUndefined | Directory to store uploaded files |
| `storage_dir` | `<class 'str'>` | No | | Directory to store uploaded files |
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
| `ttl_secs` | `<class 'int'>` | No | 31536000 | |

View file

@ -1,4 +1,4 @@
# API Providers Overview
# API Providers
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
@ -12,81 +12,17 @@ Providers come in two flavors:
Importantly, Llama Stack always strives to provide at least one fully inline provider for each API so you can iterate on a fully featured environment locally.
## External Providers
Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently.
```{toctree}
:maxdepth: 1
external.md
```
```{include} openai.md
:start-after: ## OpenAI API Compatibility
```
## Inference
Runs inference with an LLM.
```{toctree}
:maxdepth: 1
external/index
openai
inference/index
```
## Agents
Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
```{toctree}
:maxdepth: 1
agents/index
```
## DatasetIO
Interfaces with datasets and data loaders.
```{toctree}
:maxdepth: 1
datasetio/index
```
## Safety
Applies safety policies to the output at a Systems (not only model) level.
```{toctree}
:maxdepth: 1
safety/index
```
## Telemetry
Collects telemetry data from the system.
```{toctree}
:maxdepth: 1
telemetry/index
```
## Vector IO
Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents.
Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector
io and database are used to store and retrieve documents for retrieval.
```{toctree}
:maxdepth: 1
vector_io/index
```
## Tool Runtime
Is associated with the ToolGroup resources.
```{toctree}
:maxdepth: 1
tool_runtime/index
files/index
```

View file

@ -1,26 +1,34 @@
# Inference Providers
# Inference
## Overview
This section contains documentation for all available providers for the **inference** API.
- [inline::meta-reference](inline_meta-reference.md)
- [inline::sentence-transformers](inline_sentence-transformers.md)
- [remote::anthropic](remote_anthropic.md)
- [remote::bedrock](remote_bedrock.md)
- [remote::cerebras](remote_cerebras.md)
- [remote::databricks](remote_databricks.md)
- [remote::fireworks](remote_fireworks.md)
- [remote::gemini](remote_gemini.md)
- [remote::groq](remote_groq.md)
- [remote::hf::endpoint](remote_hf_endpoint.md)
- [remote::hf::serverless](remote_hf_serverless.md)
- [remote::llama-openai-compat](remote_llama-openai-compat.md)
- [remote::nvidia](remote_nvidia.md)
- [remote::ollama](remote_ollama.md)
- [remote::openai](remote_openai.md)
- [remote::passthrough](remote_passthrough.md)
- [remote::runpod](remote_runpod.md)
- [remote::sambanova](remote_sambanova.md)
- [remote::tgi](remote_tgi.md)
- [remote::together](remote_together.md)
- [remote::vllm](remote_vllm.md)
- [remote::watsonx](remote_watsonx.md)
## Providers
```{toctree}
:maxdepth: 1
inline_meta-reference
inline_sentence-transformers
remote_anthropic
remote_bedrock
remote_cerebras
remote_databricks
remote_fireworks
remote_gemini
remote_groq
remote_hf_endpoint
remote_hf_serverless
remote_llama-openai-compat
remote_nvidia
remote_ollama
remote_openai
remote_passthrough
remote_runpod
remote_sambanova
remote_tgi
remote_together
remote_vllm
remote_watsonx
```

View file

@ -1,21 +0,0 @@
# remote::cerebras-openai-compat
## Description
Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Cerebras API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.cerebras.ai/v1 | The URL for the Cerebras API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY}
```

View file

@ -1,21 +0,0 @@
# remote::fireworks-openai-compat
## Description
Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Fireworks API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY}
```

View file

@ -1,21 +0,0 @@
# remote::groq-openai-compat
## Description
Groq OpenAI-compatible provider for using Groq models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Groq API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.groq.com/openai/v1 | The URL for the Groq API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY}
```

View file

@ -8,7 +8,7 @@ HuggingFace Inference Endpoints provider for dedicated model serving.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `endpoint_name` | `<class 'str'>` | No | PydanticUndefined | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
| `endpoint_name` | `<class 'str'>` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
## Sample Configuration

View file

@ -8,7 +8,7 @@ HuggingFace Inference API serverless provider for on-demand model inference.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `huggingface_repo` | `<class 'str'>` | No | PydanticUndefined | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
| `huggingface_repo` | `<class 'str'>` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) |
## Sample Configuration

View file

@ -8,7 +8,7 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `<class 'str'>` | No | PydanticUndefined | The URL for the TGI serving endpoint |
| `url` | `<class 'str'>` | No | | The URL for the TGI serving endpoint |
## Sample Configuration

View file

@ -1,21 +0,0 @@
# remote::together-openai-compat
## Description
Together AI OpenAI-compatible provider for using Together models with OpenAI API format.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `str \| None` | No | | The Together API key |
| `openai_compat_api_base` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together API server |
## Sample Configuration
```yaml
openai_compat_api_base: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
```

View file

@ -1,7 +1,15 @@
# Post_Training Providers
# Post_Training
## Overview
This section contains documentation for all available providers for the **post_training** API.
- [inline::huggingface](inline_huggingface.md)
- [inline::torchtune](inline_torchtune.md)
- [remote::nvidia](remote_nvidia.md)
## Providers
```{toctree}
:maxdepth: 1
inline_huggingface
inline_torchtune
remote_nvidia
```

View file

@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
| `dpo_beta` | `<class 'float'>` | No | 0.1 | |
| `use_reference_model` | `<class 'bool'>` | No | True | |
| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | |
| `dpo_output_dir` | `<class 'str'>` | No | | |
## Sample Configuration
@ -31,6 +35,7 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/dummy/dpo_output
```

View file

@ -1,10 +1,18 @@
# Safety Providers
# Safety
## Overview
This section contains documentation for all available providers for the **safety** API.
- [inline::code-scanner](inline_code-scanner.md)
- [inline::llama-guard](inline_llama-guard.md)
- [inline::prompt-guard](inline_prompt-guard.md)
- [remote::bedrock](remote_bedrock.md)
- [remote::nvidia](remote_nvidia.md)
- [remote::sambanova](remote_sambanova.md)
## Providers
```{toctree}
:maxdepth: 1
inline_code-scanner
inline_llama-guard
inline_prompt-guard
remote_bedrock
remote_nvidia
remote_sambanova
```

View file

@ -1,7 +1,15 @@
# Scoring Providers
# Scoring
## Overview
This section contains documentation for all available providers for the **scoring** API.
- [inline::basic](inline_basic.md)
- [inline::braintrust](inline_braintrust.md)
- [inline::llm-as-judge](inline_llm-as-judge.md)
## Providers
```{toctree}
:maxdepth: 1
inline_basic
inline_braintrust
inline_llm-as-judge
```

View file

@ -1,5 +1,13 @@
# Telemetry Providers
# Telemetry
## Overview
This section contains documentation for all available providers for the **telemetry** API.
- [inline::meta-reference](inline_meta-reference.md)
## Providers
```{toctree}
:maxdepth: 1
inline_meta-reference
```

View file

@ -1,10 +1,18 @@
# Tool_Runtime Providers
# Tool_Runtime
## Overview
This section contains documentation for all available providers for the **tool_runtime** API.
- [inline::rag-runtime](inline_rag-runtime.md)
- [remote::bing-search](remote_bing-search.md)
- [remote::brave-search](remote_brave-search.md)
- [remote::model-context-protocol](remote_model-context-protocol.md)
- [remote::tavily-search](remote_tavily-search.md)
- [remote::wolfram-alpha](remote_wolfram-alpha.md)
## Providers
```{toctree}
:maxdepth: 1
inline_rag-runtime
remote_bing-search
remote_brave-search
remote_model-context-protocol
remote_tavily-search
remote_wolfram-alpha
```

View file

@ -1,16 +1,24 @@
# Vector_Io Providers
# Vector_Io
## Overview
This section contains documentation for all available providers for the **vector_io** API.
- [inline::chromadb](inline_chromadb.md)
- [inline::faiss](inline_faiss.md)
- [inline::meta-reference](inline_meta-reference.md)
- [inline::milvus](inline_milvus.md)
- [inline::qdrant](inline_qdrant.md)
- [inline::sqlite-vec](inline_sqlite-vec.md)
- [inline::sqlite_vec](inline_sqlite_vec.md)
- [remote::chromadb](remote_chromadb.md)
- [remote::milvus](remote_milvus.md)
- [remote::pgvector](remote_pgvector.md)
- [remote::qdrant](remote_qdrant.md)
- [remote::weaviate](remote_weaviate.md)
## Providers
```{toctree}
:maxdepth: 1
inline_chromadb
inline_faiss
inline_meta-reference
inline_milvus
inline_qdrant
inline_sqlite-vec
inline_sqlite_vec
remote_chromadb
remote_milvus
remote_pgvector
remote_qdrant
remote_weaviate
```

View file

@ -41,7 +41,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
| `db_path` | `<class 'str'>` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
## Sample Configuration

View file

@ -10,7 +10,7 @@ Please refer to the remote provider documentation.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | |
| `db_path` | `<class 'str'>` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |

View file

@ -50,12 +50,16 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `path` | `<class 'str'>` | No | PydanticUndefined | |
| `path` | `<class 'str'>` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
```

View file

@ -205,7 +205,7 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | Path to the SQLite database file |
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
## Sample Configuration

View file

@ -10,7 +10,7 @@ Please refer to the sqlite-vec provider documentation.
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `db_path` | `<class 'str'>` | No | PydanticUndefined | Path to the SQLite database file |
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
## Sample Configuration

View file

@ -40,7 +40,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `url` | `str \| None` | No | PydanticUndefined | |
| `url` | `str \| None` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
## Sample Configuration

View file

@ -111,8 +111,8 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `uri` | `<class 'str'>` | No | PydanticUndefined | The URI of the Milvus server |
| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server |
| `uri` | `<class 'str'>` | No | | The URI of the Milvus server |
| `token` | `str \| None` | No | | The token of the Milvus server |
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |

View file

@ -20,11 +20,15 @@ Please refer to the inline provider documentation.
| `prefix` | `str \| None` | No | | |
| `timeout` | `int \| None` | No | | |
| `host` | `str \| None` | No | | |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
## Sample Configuration
```yaml
api_key: ${env.QDRANT_API_KEY}
api_key: ${env.QDRANT_API_KEY:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
```

View file

@ -33,9 +33,19 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate
See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance |
| `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) |
## Sample Configuration
```yaml
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db

View file

@ -366,7 +366,7 @@ The purpose of scoring function is to calculate the score for each example based
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
### Add new benchmark into template
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/open-benchmark/run.yaml)
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/run.yaml)
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
- `benchmark_id`: identifier of the benchmark
@ -378,7 +378,7 @@ Secondly, you need to add the new benchmark you just created under the `benchmar
Spin up llama stack server with 'open-benchmark' templates
```
llama stack run llama_stack/templates/open-benchmark/run.yaml
llama stack run llama_stack/distributions/open-benchmark/run.yaml
```

View file

@ -19,11 +19,11 @@ You have two ways to install Llama Stack:
cd ~/local
git clone git@github.com:meta-llama/llama-stack.git
conda create -n myenv python=3.10
conda activate myenv
uv venv myenv --python 3.12
source myenv/bin/activate # On Windows: myenv\Scripts\activate
cd llama-stack
$CONDA_PREFIX/bin/pip install -e .
pip install -e .
## Downloading models via CLI

Some files were not shown because too many files have changed in this diff Show more