mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
Merge remote-tracking branch 'origin/main' into stack-config-default-embed
This commit is contained in:
commit
31249a1a75
237 changed files with 30895 additions and 15441 deletions
19
.dockerignore
Normal file
19
.dockerignore
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
.venv
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.pyd
|
||||||
|
*.so
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
htmlcov*
|
||||||
|
.coverage
|
||||||
|
coverage*
|
||||||
|
.cache
|
||||||
|
.mypy_cache
|
||||||
|
.pytest_cache
|
||||||
|
.ruff_cache
|
||||||
|
uv.lock
|
||||||
|
node_modules
|
||||||
|
build
|
||||||
|
/tmp
|
||||||
|
|
@ -82,11 +82,14 @@ runs:
|
||||||
echo "No recording changes"
|
echo "No recording changes"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Write inference logs to file
|
- name: Write docker logs to file
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
||||||
|
distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
|
||||||
|
stack_container_name="llama-stack-test-$distro_name"
|
||||||
|
sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
|
||||||
|
|
||||||
- name: Upload logs
|
- name: Upload logs
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ runs:
|
||||||
echo "Building Llama Stack"
|
echo "Building Llama Stack"
|
||||||
|
|
||||||
LLAMA_STACK_DIR=. \
|
LLAMA_STACK_DIR=. \
|
||||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Configure git for commits
|
- name: Configure git for commits
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -14,6 +14,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
|
| Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
|
||||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||||
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
||||||
|
|
|
||||||
7
.github/workflows/install-script-ci.yml
vendored
7
.github/workflows/install-script-ci.yml
vendored
|
|
@ -30,8 +30,11 @@ jobs:
|
||||||
|
|
||||||
- name: Build a single provider
|
- name: Build a single provider
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync \
|
docker build . \
|
||||||
llama stack build --template starter --image-type container --image-name test
|
-f containers/Containerfile \
|
||||||
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=starter \
|
||||||
|
--tag llama-stack:starter-ci
|
||||||
|
|
||||||
- name: Run installer end-to-end
|
- name: Run installer end-to-end
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
18
.github/workflows/integration-auth-tests.yml
vendored
18
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -73,6 +73,24 @@ jobs:
|
||||||
image_name: kube
|
image_name: kube
|
||||||
apis: []
|
apis: []
|
||||||
providers: {}
|
providers: {}
|
||||||
|
storage:
|
||||||
|
backends:
|
||||||
|
kv_default:
|
||||||
|
type: kv_sqlite
|
||||||
|
db_path: $run_dir/kvstore.db
|
||||||
|
sql_default:
|
||||||
|
type: sql_sqlite
|
||||||
|
db_path: $run_dir/sql_store.db
|
||||||
|
stores:
|
||||||
|
metadata:
|
||||||
|
namespace: registry
|
||||||
|
backend: kv_default
|
||||||
|
inference:
|
||||||
|
table_name: inference_store
|
||||||
|
backend: sql_default
|
||||||
|
conversations:
|
||||||
|
table_name: openai_conversations
|
||||||
|
backend: sql_default
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
EOF
|
EOF
|
||||||
|
|
|
||||||
4
.github/workflows/integration-tests.yml
vendored
4
.github/workflows/integration-tests.yml
vendored
|
|
@ -47,7 +47,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server, docker]
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
|
|
@ -82,7 +82,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
OPENAI_API_KEY: dummy
|
OPENAI_API_KEY: dummy
|
||||||
with:
|
with:
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
|
||||||
setup: ${{ matrix.config.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: ${{ matrix.config.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ jobs:
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
run: |
|
run: |
|
||||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Check Storage and Memory Available Before Tests
|
- name: Check Storage and Memory Available Before Tests
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|
|
||||||
57
.github/workflows/providers-build.yml
vendored
57
.github/workflows/providers-build.yml
vendored
|
|
@ -14,6 +14,8 @@ on:
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/distributions/**'
|
- 'llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
- 'containers/Containerfile'
|
||||||
|
- '.dockerignore'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
|
|
@ -24,6 +26,8 @@ on:
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/distributions/**'
|
- 'llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
- 'containers/Containerfile'
|
||||||
|
- '.dockerignore'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
|
|
@ -60,15 +64,19 @@ jobs:
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
- name: Print build dependencies
|
- name: Install distribution into venv
|
||||||
|
if: matrix.image-type == 'venv'
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
|
uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Run Llama Stack Build
|
- name: Build container image
|
||||||
|
if: matrix.image-type == 'container'
|
||||||
run: |
|
run: |
|
||||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
docker build . \
|
||||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
-f containers/Containerfile \
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=${{ matrix.distro }} \
|
||||||
|
--tag llama-stack:${{ matrix.distro }}-ci
|
||||||
|
|
||||||
- name: Print dependencies in the image
|
- name: Print dependencies in the image
|
||||||
if: matrix.image-type == 'venv'
|
if: matrix.image-type == 'venv'
|
||||||
|
|
@ -86,8 +94,8 @@ jobs:
|
||||||
|
|
||||||
- name: Build a single provider
|
- name: Build a single provider
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
|
uv pip install -e .
|
||||||
|
uv run --no-sync llama stack list-deps --providers inference=remote::ollama | xargs -L1 uv pip install
|
||||||
build-custom-container-distribution:
|
build-custom-container-distribution:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -97,11 +105,16 @@ jobs:
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
- name: Build a single provider
|
- name: Build container image
|
||||||
run: |
|
run: |
|
||||||
yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
|
||||||
yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
|
docker build . \
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
-f containers/Containerfile \
|
||||||
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=ci-tests \
|
||||||
|
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||||
|
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||||
|
-t llama-stack:ci-tests
|
||||||
|
|
||||||
- name: Inspect the container image entrypoint
|
- name: Inspect the container image entrypoint
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -112,7 +125,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
@ -129,17 +142,19 @@ jobs:
|
||||||
- name: Pin distribution to UBI9 base
|
- name: Pin distribution to UBI9 base
|
||||||
run: |
|
run: |
|
||||||
yq -i '
|
yq -i '
|
||||||
.image_type = "container" |
|
|
||||||
.image_name = "ubi9-test" |
|
|
||||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||||
' llama_stack/distributions/ci-tests/build.yaml
|
' llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
||||||
- name: Build dev container (UBI9)
|
- name: Build UBI9 container image
|
||||||
env:
|
|
||||||
USE_COPY_NOT_MOUNT: "true"
|
|
||||||
LLAMA_STACK_DIR: "."
|
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
|
||||||
|
docker build . \
|
||||||
|
-f containers/Containerfile \
|
||||||
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=ci-tests \
|
||||||
|
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||||
|
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||||
|
-t llama-stack:ci-tests-ubi9
|
||||||
|
|
||||||
- name: Inspect UBI9 image
|
- name: Inspect UBI9 image
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -150,7 +165,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
|
|
@ -0,0 +1,105 @@
|
||||||
|
name: Test llama stack list-deps
|
||||||
|
|
||||||
|
run-name: Test llama stack list-deps
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/cli/stack/list_deps.py'
|
||||||
|
- 'llama_stack/cli/stack/_list_deps.py'
|
||||||
|
- 'llama_stack/core/build.*'
|
||||||
|
- 'llama_stack/core/*.sh'
|
||||||
|
- '.github/workflows/providers-list-deps.yml'
|
||||||
|
- 'llama_stack/templates/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/cli/stack/list_deps.py'
|
||||||
|
- 'llama_stack/cli/stack/_list_deps.py'
|
||||||
|
- 'llama_stack/core/build.*'
|
||||||
|
- 'llama_stack/core/*.sh'
|
||||||
|
- '.github/workflows/providers-list-deps.yml'
|
||||||
|
- 'llama_stack/templates/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
generate-matrix:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Generate Distribution List
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||||
|
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
list-deps:
|
||||||
|
needs: generate-matrix
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
|
||||||
|
image-type: [venv, container]
|
||||||
|
fail-fast: false # We want to run all jobs even if some fail
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
|
- name: Print dependencies
|
||||||
|
run: |
|
||||||
|
uv run llama stack list-deps ${{ matrix.distro }}
|
||||||
|
|
||||||
|
- name: Install Distro using llama stack list-deps
|
||||||
|
run: |
|
||||||
|
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
||||||
|
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
||||||
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
- name: Print dependencies in the image
|
||||||
|
if: matrix.image-type == 'venv'
|
||||||
|
run: |
|
||||||
|
uv pip list
|
||||||
|
|
||||||
|
show-single-provider:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
|
- name: Show a single provider
|
||||||
|
run: |
|
||||||
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps --providers inference=remote::ollama
|
||||||
|
|
||||||
|
list-deps-from-config:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
|
- name: list-des from Config
|
||||||
|
env:
|
||||||
|
USE_COPY_NOT_MOUNT: "true"
|
||||||
|
LLAMA_STACK_DIR: "."
|
||||||
|
run: |
|
||||||
|
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
@ -46,9 +46,9 @@ jobs:
|
||||||
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
||||||
cat tests/external/ramalama-stack/run.yaml
|
cat tests/external/ramalama-stack/run.yaml
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Install distribution dependencies
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
|
uv run llama stack list-deps tests/external/ramalama-stack/build.yaml | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
|
||||||
7
.github/workflows/test-external.yml
vendored
7
.github/workflows/test-external.yml
vendored
|
|
@ -44,11 +44,14 @@ jobs:
|
||||||
|
|
||||||
- name: Print distro dependencies
|
- name: Print distro dependencies
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml --print-deps-only
|
uv run --no-sync llama stack list-deps tests/external/build.yaml
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml
|
uv venv ci-test
|
||||||
|
source ci-test/bin/activate
|
||||||
|
uv pip install -e .
|
||||||
|
LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/build.yaml | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
|
||||||
|
|
@ -167,9 +167,9 @@ under the LICENSE file in the root directory of this source tree.
|
||||||
|
|
||||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||||
|
|
||||||
### Using `llama stack build`
|
### Installing dependencies of distributions
|
||||||
|
|
||||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
When installing dependencies for a distribution, you can use `llama stack list-deps` to view and install the required packages.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -177,7 +177,12 @@ cd work/
|
||||||
git clone https://github.com/llamastack/llama-stack.git
|
git clone https://github.com/llamastack/llama-stack.git
|
||||||
git clone https://github.com/llamastack/llama-stack-client-python.git
|
git clone https://github.com/llamastack/llama-stack-client-python.git
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
|
||||||
|
# Show dependencies for a distribution
|
||||||
|
llama stack list-deps <distro-name>
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
|
|
||||||
### Updating distribution configurations
|
### Updating distribution configurations
|
||||||
|
|
|
||||||
|
|
@ -27,8 +27,11 @@ MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||||
# get meta url from llama.com
|
# get meta url from llama.com
|
||||||
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
||||||
|
|
||||||
|
# install dependencies for the distribution
|
||||||
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
|
|
||||||
# start a llama stack server
|
# start a llama stack server
|
||||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
|
||||||
|
|
||||||
# install client to interact with the server
|
# install client to interact with the server
|
||||||
pip install llama-stack-client
|
pip install llama-stack-client
|
||||||
|
|
@ -89,7 +92,7 @@ As more providers start supporting Llama 4, you can use them in Llama Stack as w
|
||||||
To try Llama Stack locally, run:
|
To try Llama Stack locally, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
|
curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh | bash
|
||||||
```
|
```
|
||||||
|
|
||||||
### Overview
|
### Overview
|
||||||
|
|
|
||||||
|
|
@ -98,21 +98,30 @@ data:
|
||||||
- provider_id: model-context-protocol
|
- provider_id: model-context-protocol
|
||||||
provider_type: remote::model-context-protocol
|
provider_type: remote::model-context-protocol
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
storage:
|
||||||
type: postgres
|
backends:
|
||||||
|
kv_default:
|
||||||
|
type: kv_postgres
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
table_name: llamastack_kvstore
|
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
||||||
inference_store:
|
sql_default:
|
||||||
type: postgres
|
type: sql_postgres
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
references:
|
||||||
|
metadata:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: registry
|
||||||
|
inference:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: inference_store
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
|
|
@ -137,5 +146,4 @@ data:
|
||||||
port: 8323
|
port: 8323
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
creationTimestamp: null
|
|
||||||
name: llama-stack-config
|
name: llama-stack-config
|
||||||
|
|
|
||||||
|
|
@ -95,21 +95,30 @@ providers:
|
||||||
- provider_id: model-context-protocol
|
- provider_id: model-context-protocol
|
||||||
provider_type: remote::model-context-protocol
|
provider_type: remote::model-context-protocol
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
storage:
|
||||||
type: postgres
|
backends:
|
||||||
|
kv_default:
|
||||||
|
type: kv_postgres
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
table_name: llamastack_kvstore
|
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
||||||
inference_store:
|
sql_default:
|
||||||
type: postgres
|
type: sql_postgres
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
references:
|
||||||
|
metadata:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: registry
|
||||||
|
inference:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: inference_store
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
|
|
|
||||||
136
containers/Containerfile
Normal file
136
containers/Containerfile
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
# syntax=docker/dockerfile:1.6
|
||||||
|
#
|
||||||
|
# This Dockerfile is used to build the Llama Stack container image.
|
||||||
|
# Example:
|
||||||
|
# docker build \
|
||||||
|
# -f containers/Containerfile \
|
||||||
|
# --build-arg DISTRO_NAME=starter \
|
||||||
|
# --tag llama-stack:starter .
|
||||||
|
|
||||||
|
ARG BASE_IMAGE=python:3.12-slim
|
||||||
|
FROM ${BASE_IMAGE}
|
||||||
|
|
||||||
|
ARG INSTALL_MODE="pypi"
|
||||||
|
ARG LLAMA_STACK_DIR="/workspace"
|
||||||
|
ARG LLAMA_STACK_CLIENT_DIR=""
|
||||||
|
ARG PYPI_VERSION=""
|
||||||
|
ARG TEST_PYPI_VERSION=""
|
||||||
|
ARG KEEP_WORKSPACE=""
|
||||||
|
ARG DISTRO_NAME="starter"
|
||||||
|
ARG RUN_CONFIG_PATH=""
|
||||||
|
ARG UV_HTTP_TIMEOUT=500
|
||||||
|
ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1
|
||||||
|
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN set -eux; \
|
||||||
|
if command -v dnf >/dev/null 2>&1; then \
|
||||||
|
dnf -y update && \
|
||||||
|
dnf install -y iputils git net-tools wget \
|
||||||
|
vim-minimal python3.12 python3.12-pip python3.12-wheel \
|
||||||
|
python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
|
||||||
|
ln -sf /usr/bin/pip3.12 /usr/local/bin/pip && \
|
||||||
|
ln -sf /usr/bin/python3.12 /usr/local/bin/python && \
|
||||||
|
dnf clean all; \
|
||||||
|
elif command -v apt-get >/dev/null 2>&1; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
iputils-ping net-tools iproute2 dnsutils telnet \
|
||||||
|
curl wget git procps psmisc lsof traceroute bubblewrap \
|
||||||
|
gcc g++ && \
|
||||||
|
rm -rf /var/lib/apt/lists/*; \
|
||||||
|
else \
|
||||||
|
echo "Unsupported base image: expected dnf or apt-get" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir uv
|
||||||
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
|
|
||||||
|
ENV INSTALL_MODE=${INSTALL_MODE}
|
||||||
|
ENV LLAMA_STACK_DIR=${LLAMA_STACK_DIR}
|
||||||
|
ENV LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR}
|
||||||
|
ENV PYPI_VERSION=${PYPI_VERSION}
|
||||||
|
ENV TEST_PYPI_VERSION=${TEST_PYPI_VERSION}
|
||||||
|
ENV KEEP_WORKSPACE=${KEEP_WORKSPACE}
|
||||||
|
ENV DISTRO_NAME=${DISTRO_NAME}
|
||||||
|
ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH}
|
||||||
|
|
||||||
|
# Copy the repository so editable installs and run configurations are available.
|
||||||
|
COPY . /workspace
|
||||||
|
|
||||||
|
# Install llama-stack
|
||||||
|
RUN set -eux; \
|
||||||
|
if [ "$INSTALL_MODE" = "editable" ]; then \
|
||||||
|
if [ ! -d "$LLAMA_STACK_DIR" ]; then \
|
||||||
|
echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
|
||||||
|
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
|
||||||
|
uv pip install --no-cache-dir fastapi libcst; \
|
||||||
|
if [ -n "$TEST_PYPI_VERSION" ]; then \
|
||||||
|
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
|
||||||
|
else \
|
||||||
|
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
|
||||||
|
fi; \
|
||||||
|
else \
|
||||||
|
if [ -n "$PYPI_VERSION" ]; then \
|
||||||
|
uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
|
||||||
|
else \
|
||||||
|
uv pip install --no-cache-dir llama-stack; \
|
||||||
|
fi; \
|
||||||
|
fi;
|
||||||
|
|
||||||
|
# Install the client package if it is provided
|
||||||
|
RUN set -eux; \
|
||||||
|
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||||
|
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||||
|
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
|
||||||
|
fi;
|
||||||
|
|
||||||
|
# Install the dependencies for the distribution
|
||||||
|
RUN set -eux; \
|
||||||
|
if [ -z "$DISTRO_NAME" ]; then \
|
||||||
|
echo "DISTRO_NAME must be provided" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
deps="$(llama stack list-deps "$DISTRO_NAME")"; \
|
||||||
|
if [ -n "$deps" ]; then \
|
||||||
|
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
RUN set -eux; \
|
||||||
|
pip uninstall -y uv; \
|
||||||
|
should_remove=1; \
|
||||||
|
if [ -n "$KEEP_WORKSPACE" ]; then should_remove=0; fi; \
|
||||||
|
if [ "$INSTALL_MODE" = "editable" ]; then should_remove=0; fi; \
|
||||||
|
case "$RUN_CONFIG_PATH" in \
|
||||||
|
/workspace*) should_remove=0 ;; \
|
||||||
|
esac; \
|
||||||
|
if [ "$should_remove" -eq 1 ] && [ -d /workspace ]; then rm -rf /workspace; fi
|
||||||
|
|
||||||
|
RUN cat <<'EOF' >/usr/local/bin/llama-stack-entrypoint.sh
|
||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ -n "$RUN_CONFIG_PATH" ] && [ -f "$RUN_CONFIG_PATH" ]; then
|
||||||
|
exec llama stack run "$RUN_CONFIG_PATH" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$DISTRO_NAME" ]; then
|
||||||
|
exec llama stack run "$DISTRO_NAME" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec llama stack run "$@"
|
||||||
|
EOF
|
||||||
|
RUN chmod +x /usr/local/bin/llama-stack-entrypoint.sh
|
||||||
|
|
||||||
|
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/llama-stack-entrypoint.sh"]
|
||||||
|
|
@ -51,8 +51,8 @@ device: cpu
|
||||||
You can access the HuggingFace trainer via the `starter` distribution:
|
You can access the HuggingFace trainer via the `starter` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage Example
|
### Usage Example
|
||||||
|
|
|
||||||
|
|
@ -175,8 +175,7 @@ llama-stack-client benchmarks register \
|
||||||
**1. Start the Llama Stack API Server**
|
**1. Start the Llama Stack API Server**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Build and run a distribution (example: together)
|
llama stack list-deps together | xargs -L1 uv pip install
|
||||||
llama stack build --distro together --image-type venv
|
|
||||||
llama stack run together
|
llama stack run together
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -209,7 +208,7 @@ The playground works with any Llama Stack distribution. Popular options include:
|
||||||
<TabItem value="together" label="Together AI">
|
<TabItem value="together" label="Together AI">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro together --image-type venv
|
llama stack list-deps together | xargs -L1 uv pip install
|
||||||
llama stack run together
|
llama stack run together
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -222,7 +221,7 @@ llama stack run together
|
||||||
<TabItem value="ollama" label="Ollama (Local)">
|
<TabItem value="ollama" label="Ollama (Local)">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro ollama --image-type venv
|
llama stack list-deps ollama | xargs -L1 uv pip install
|
||||||
llama stack run ollama
|
llama stack run ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -235,7 +234,7 @@ llama stack run ollama
|
||||||
<TabItem value="meta-reference" label="Meta Reference">
|
<TabItem value="meta-reference" label="Meta Reference">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro meta-reference --image-type venv
|
llama stack list-deps meta-reference | xargs -L1 uv pip install
|
||||||
llama stack run meta-reference
|
llama stack run meta-reference
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,8 @@ RAG enables your applications to reference and recall information from external
|
||||||
In one terminal, start the Llama Stack server:
|
In one terminal, start the Llama Stack server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run llama stack build --distro starter --image-type venv --run
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Connect with OpenAI Client
|
### 2. Connect with OpenAI Client
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,10 @@ The new `/v2` API must be introduced alongside the existing `/v1` API and run in
|
||||||
|
|
||||||
When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
|
When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
|
||||||
|
|
||||||
|
### Deprecated APIs
|
||||||
|
|
||||||
|
Deprecated APIs are those that are no longer actively maintained or supported. Depreated APIs are marked with the flag `deprecated = True` in the OpenAPI spec. These APIs will be removed in a future release.
|
||||||
|
|
||||||
### API Stability vs. Provider Stability
|
### API Stability vs. Provider Stability
|
||||||
|
|
||||||
The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
|
The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
|
||||||
|
|
|
||||||
|
|
@ -158,17 +158,16 @@ under the LICENSE file in the root directory of this source tree.
|
||||||
|
|
||||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||||
|
|
||||||
### Using `llama stack build`
|
### Setup for development
|
||||||
|
|
||||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```bash
|
```bash
|
||||||
cd work/
|
|
||||||
git clone https://github.com/meta-llama/llama-stack.git
|
git clone https://github.com/meta-llama/llama-stack.git
|
||||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
uv run llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# (Optional) If you are developing the llama-stack-client-python package, you can add it as an editable package.
|
||||||
|
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||||
|
uv add --editable ../llama-stack-client-python
|
||||||
```
|
```
|
||||||
|
|
||||||
### Updating distribution configurations
|
### Updating distribution configurations
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ def get_base_url(self) -> str:
|
||||||
|
|
||||||
## Testing the Provider
|
## Testing the Provider
|
||||||
|
|
||||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
|
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, install its dependencies with `llama stack list-deps together | xargs -L1 uv pip install`.
|
||||||
|
|
||||||
### 1. Integration Testing
|
### 1. Integration Testing
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,225 +5,79 @@ sidebar_label: Build your own Distribution
|
||||||
sidebar_position: 3
|
sidebar_position: 3
|
||||||
---
|
---
|
||||||
|
|
||||||
This guide will walk you through the steps to get started with building a Llama Stack distribution from scratch with your choice of API providers.
|
This guide walks you through inspecting existing distributions, customising their configuration, and building runnable artefacts for your own deployment.
|
||||||
|
|
||||||
|
### Explore existing distributions
|
||||||
|
|
||||||
### Setting your log level
|
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
||||||
|
|
||||||
In order to specify the proper logging level users can apply the following environment variable `LLAMA_STACK_LOGGING` with the following format:
|
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
||||||
|
- `run.yaml` – sample run configuration (when provided).
|
||||||
|
- Documentation fragments that power this site.
|
||||||
|
|
||||||
`LLAMA_STACK_LOGGING=server=debug;core=info`
|
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
||||||
|
|
||||||
Where each category in the following list:
|
|
||||||
|
|
||||||
- all
|
|
||||||
- core
|
|
||||||
- server
|
|
||||||
- router
|
|
||||||
- inference
|
|
||||||
- agents
|
|
||||||
- safety
|
|
||||||
- eval
|
|
||||||
- tools
|
|
||||||
- client
|
|
||||||
|
|
||||||
Can be set to any of the following log levels:
|
|
||||||
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warning
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
|
|
||||||
The default global log level is `info`. `all` sets the log level for all components.
|
|
||||||
|
|
||||||
A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
|
|
||||||
|
|
||||||
### Llama Stack Build
|
|
||||||
|
|
||||||
In order to build your own distribution, we recommend you clone the `llama-stack` repository.
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
git clone git@github.com:meta-llama/llama-stack.git
|
|
||||||
cd llama-stack
|
|
||||||
pip install -e .
|
|
||||||
```
|
|
||||||
Use the CLI to build your distribution.
|
|
||||||
The main points to consider are:
|
|
||||||
1. **Image Type** - Do you want a venv environment or a Container (eg. Docker)
|
|
||||||
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
|
|
||||||
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build -h
|
|
||||||
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--distro DISTRIBUTION] [--list-distros] [--image-type {container,venv}] [--image-name IMAGE_NAME] [--print-deps-only]
|
|
||||||
[--run] [--providers PROVIDERS]
|
|
||||||
|
|
||||||
Build a Llama stack container
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to
|
|
||||||
enter information interactively (default: None)
|
|
||||||
--template TEMPLATE (deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default:
|
|
||||||
None)
|
|
||||||
--distro DISTRIBUTION, --distribution DISTRIBUTION
|
|
||||||
Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default: None)
|
|
||||||
--list-distros, --list-distributions
|
|
||||||
Show the available distributions for building a Llama Stack distribution (default: False)
|
|
||||||
--image-type {container,venv}
|
|
||||||
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
|
|
||||||
--image-name IMAGE_NAME
|
|
||||||
[for image-type=container|venv] Name of the virtual environment to use for the build. If not specified, currently active environment will be used if found. (default:
|
|
||||||
None)
|
|
||||||
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
|
||||||
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
|
||||||
--providers PROVIDERS
|
|
||||||
Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per
|
|
||||||
API. (default: None)
|
|
||||||
```
|
|
||||||
|
|
||||||
After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.
|
|
||||||
|
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="template" label="Building from a template">
|
<TabItem value="container" label="Building a container">
|
||||||
To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
|
|
||||||
|
|
||||||
The following command will allow you to see the available templates and their corresponding providers.
|
Use the Containerfile at `containers/Containerfile`, which installs `llama-stack`, resolves distribution dependencies via `llama stack list-deps`, and sets the entrypoint to `llama stack run`.
|
||||||
```
|
|
||||||
llama stack build --list-templates
|
```bash
|
||||||
|
docker build . \
|
||||||
|
-f containers/Containerfile \
|
||||||
|
--build-arg DISTRO_NAME=starter \
|
||||||
|
--tag llama-stack:starter
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
Handy build arguments:
|
||||||
------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| Template Name | Description |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| watsonx | Use watsonx for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| vllm-gpu | Use a built-in vLLM engine for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| together | Use Together.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| tgi | Use (an external) TGI server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| starter | Quick start template for running Llama Stack with several popular providers |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| sambanova | Use SambaNova for running LLM inference and safety |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| remote-vllm | Use (an external) vLLM server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| postgres-demo | Quick start template for running Llama Stack with several popular providers |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| passthrough | Use Passthrough hosted llama-stack endpoint for LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| open-benchmark | Distribution for running open benchmarks |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| ollama | Use (an external) Ollama server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| nvidia | Use NVIDIA NIM for running LLM inference, evaluation and safety |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| meta-reference-gpu | Use Meta Reference for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| llama_api | Distribution for running e2e tests in CI |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| groq | Use Groq for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| fireworks | Use Fireworks.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| experimental-post-training | Experimental template for post training |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| dell | Dell's distribution of Llama Stack. TGI inference via Dell's custom |
|
|
||||||
| | container |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| ci-tests | Distribution for running e2e tests in CI |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| cerebras | Use Cerebras for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| bedrock | Use AWS Bedrock for running LLM inference and safety |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
You may then pick a template to build your distribution with providers fitted to your liking.
|
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
||||||
|
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/run.yaml`).
|
||||||
|
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
||||||
|
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
||||||
|
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
||||||
|
- `KEEP_WORKSPACE=1` – retain `/workspace` in the final image if you need to access additional files (such as sample configs or provider bundles).
|
||||||
|
|
||||||
For example, to build a distribution with TGI as the inference provider, you can run:
|
Make sure any custom `build.yaml`, run configs, or provider directories you reference are included in the Docker build context so the Containerfile can read them.
|
||||||
```
|
|
||||||
$ llama stack build --distro starter
|
|
||||||
...
|
|
||||||
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
|
||||||
```
|
|
||||||
|
|
||||||
```{tip}
|
|
||||||
The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
|
||||||
```
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="scratch" label="Building from Scratch">
|
<TabItem value="external" label="Building with external providers">
|
||||||
|
|
||||||
If the provided templates do not fit your use case, you could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations.
|
External providers live outside the main repository but can be bundled by pointing `external_providers_dir` to a directory that contains your provider packages.
|
||||||
|
|
||||||
It would be best to start with a template and understand the structure of the config file and the various concepts ( APIS, providers, resources, etc.) before starting from scratch.
|
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
||||||
```
|
2. Update `build.yaml` with the directory and provider entries.
|
||||||
llama stack build
|
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/run.yaml` if you want to bake the config.
|
||||||
|
|
||||||
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
|
Example `build.yaml` excerpt for a custom Ollama provider:
|
||||||
> Enter the image type you want your Llama Stack to be built as (container or venv): venv
|
|
||||||
|
|
||||||
Llama Stack is composed of several APIs working together. Let's select
|
|
||||||
the provider types (implementations) you want to use for these APIs.
|
|
||||||
|
|
||||||
Tip: use <TAB> to see options for the providers.
|
|
||||||
|
|
||||||
> Enter provider for API inference: inline::meta-reference
|
|
||||||
> Enter provider for API safety: inline::llama-guard
|
|
||||||
> Enter provider for API agents: inline::meta-reference
|
|
||||||
> Enter provider for API memory: inline::faiss
|
|
||||||
> Enter provider for API datasetio: inline::meta-reference
|
|
||||||
> Enter provider for API scoring: inline::meta-reference
|
|
||||||
> Enter provider for API eval: inline::meta-reference
|
|
||||||
> Enter provider for API telemetry: inline::meta-reference
|
|
||||||
|
|
||||||
> (Optional) Enter a short description for your Llama Stack:
|
|
||||||
|
|
||||||
You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml`
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="config" label="Building from a pre-existing build config file">
|
|
||||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
|
||||||
|
|
||||||
- The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --config llama_stack/distributions/starter/build.yaml
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="external" label="Building with External Providers">
|
|
||||||
|
|
||||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
|
|
||||||
|
|
||||||
To build a distribution with external providers, you need to:
|
|
||||||
|
|
||||||
1. Configure the `external_providers_dir` in your build configuration file:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# Example my-external-stack.yaml with external providers
|
|
||||||
version: '2'
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Custom distro for CI tests
|
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::custom_ollama
|
- remote::custom_ollama
|
||||||
# Add more providers as needed
|
external_providers_dir: /workspace/providers.d
|
||||||
image_type: container
|
```
|
||||||
image_name: ci-test
|
|
||||||
# Path to external provider implementations
|
Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
|
||||||
external_providers_dir: ~/.llama/providers.d
|
|
||||||
|
```python
|
||||||
|
from llama_stack.providers.datatypes import ProviderSpec
|
||||||
|
|
||||||
|
|
||||||
|
def get_provider_spec() -> ProviderSpec:
|
||||||
|
return ProviderSpec(
|
||||||
|
provider_type="remote::custom_ollama",
|
||||||
|
module="llama_stack_ollama_provider",
|
||||||
|
config_class="llama_stack_ollama_provider.config.OllamaImplConfig",
|
||||||
|
pip_packages=[
|
||||||
|
"ollama",
|
||||||
|
"aiohttp",
|
||||||
|
"llama-stack-provider-ollama",
|
||||||
|
],
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
Here's an example for a custom Ollama provider:
|
Here's an example for a custom Ollama provider:
|
||||||
|
|
@ -245,53 +99,22 @@ The `pip_packages` section lists the Python packages required by the provider, a
|
||||||
provider package itself. The package must be available on PyPI or can be provided from a local
|
provider package itself. The package must be available on PyPI or can be provided from a local
|
||||||
directory or a git repository (git must be installed on the build environment).
|
directory or a git repository (git must be installed on the build environment).
|
||||||
|
|
||||||
2. Build your distribution using the config file:
|
For deeper guidance, see the [External Providers documentation](../providers/external/).
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --config my-external-stack.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external/).
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="container" label="Building Container">
|
</Tabs>
|
||||||
|
|
||||||
:::tip Podman Alternative
|
### Run your stack server
|
||||||
Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
|
||||||
:::
|
|
||||||
|
|
||||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
After building the image, launch it directly with Docker or Podman—the entrypoint calls `llama stack run` using the baked distribution or the bundled run config:
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --distro starter --image-type container
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama stack build --distro starter --image-type container
|
|
||||||
...
|
|
||||||
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
|
|
||||||
```
|
|
||||||
|
|
||||||
Now set some environment variables for the inference model ID and Llama Stack Port and create a local directory to mount into the container's file system.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export INFERENCE_MODEL="llama3.2:3b"
|
|
||||||
export LLAMA_STACK_PORT=8321
|
|
||||||
mkdir -p ~/.llama
|
|
||||||
```
|
|
||||||
|
|
||||||
After this step is successful, you should be able to find the built container image and test it with the below Docker command:
|
|
||||||
|
|
||||||
```
|
|
||||||
docker run -d \
|
docker run -d \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||||
localhost/distribution-ollama:dev \
|
llama-stack:starter \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -311,131 +134,14 @@ Here are the docker flags and their uses:
|
||||||
|
|
||||||
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
### Running your Stack server
|
If you prepared a custom run config, mount it into the container and reference it explicitly:
|
||||||
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack build` step.
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
-v $(pwd)/run.yaml:/app/run.yaml \
|
||||||
|
llama-stack:starter \
|
||||||
|
/app/run.yaml
|
||||||
```
|
```
|
||||||
llama stack run -h
|
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
|
||||||
[--image-type {venv}] [--enable-ui]
|
|
||||||
[config | distro]
|
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
|
||||||
--image-name IMAGE_NAME
|
|
||||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
|
||||||
--image-type {venv}
|
|
||||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
|
||||||
--enable-ui Start the UI server (default: False)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note:** Container images built with `llama stack build --image-type container` cannot be run using `llama stack run`. Instead, they must be run directly using Docker or Podman commands as shown in the container building section above.
|
|
||||||
|
|
||||||
```
|
|
||||||
# Start using template name
|
|
||||||
llama stack run tgi
|
|
||||||
|
|
||||||
# Start using config file
|
|
||||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
|
|
||||||
Serving API inspect
|
|
||||||
GET /health
|
|
||||||
GET /providers/list
|
|
||||||
GET /routes/list
|
|
||||||
Serving API inference
|
|
||||||
POST /inference/chat_completion
|
|
||||||
POST /inference/completion
|
|
||||||
POST /inference/embeddings
|
|
||||||
...
|
|
||||||
Serving API agents
|
|
||||||
POST /agents/create
|
|
||||||
POST /agents/session/create
|
|
||||||
POST /agents/turn/create
|
|
||||||
POST /agents/delete
|
|
||||||
POST /agents/session/delete
|
|
||||||
POST /agents/session/get
|
|
||||||
POST /agents/step/get
|
|
||||||
POST /agents/turn/get
|
|
||||||
|
|
||||||
Listening on ['::', '0.0.0.0']:8321
|
|
||||||
INFO: Started server process [2935911]
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
|
||||||
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
|
||||||
```
|
|
||||||
|
|
||||||
### Listing Distributions
|
|
||||||
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack list -h
|
|
||||||
usage: llama stack list [-h]
|
|
||||||
|
|
||||||
list the build stacks
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
```
|
|
||||||
|
|
||||||
Example Usage
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack list
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
------------------------------+-----------------------------------------------------------------+--------------+------------+
|
|
||||||
| Stack Name | Path | Build Config | Run Config |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| together | ~/.llama/distributions/together | Yes | No |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| bedrock | ~/.llama/distributions/bedrock | Yes | No |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| starter | ~/.llama/distributions/starter | Yes | Yes |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| remote-vllm | ~/.llama/distributions/remote-vllm | Yes | Yes |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
### Removing a Distribution
|
|
||||||
Use the remove command to delete a distribution you've previously built.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack rm -h
|
|
||||||
usage: llama stack rm [-h] [--all] [name]
|
|
||||||
|
|
||||||
Remove the build stack
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
name Name of the stack to delete (default: None)
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--all, -a Delete all stacks (use with caution) (default: False)
|
|
||||||
```
|
|
||||||
|
|
||||||
Example
|
|
||||||
```
|
|
||||||
llama stack rm llamastack-test
|
|
||||||
```
|
|
||||||
|
|
||||||
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they're no longer needed.
|
|
||||||
|
|
||||||
### Troubleshooting
|
|
||||||
|
|
||||||
If you encounter any issues, ask questions in our discord or search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
|
|
||||||
|
|
|
||||||
|
|
@ -44,18 +44,32 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence:
|
||||||
type: sqlite
|
agent_state:
|
||||||
namespace: null
|
backend: kv_default
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
|
namespace: agents
|
||||||
|
responses:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: responses
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
storage:
|
||||||
namespace: null
|
backends:
|
||||||
type: sqlite
|
kv_default:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
|
type: kv_sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db
|
||||||
|
sql_default:
|
||||||
|
type: sql_sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
|
||||||
|
references:
|
||||||
|
metadata:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: registry
|
||||||
|
inference:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: inference_store
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ This avoids the overhead of setting up a server.
|
||||||
```bash
|
```bash
|
||||||
# setup
|
# setup
|
||||||
uv pip install llama-stack
|
uv pip install llama-stack
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
|
||||||
|
|
@ -1,56 +1,155 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
data:
|
data:
|
||||||
stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n-
|
stack_run_config.yaml: |
|
||||||
inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n
|
version: '2'
|
||||||
\ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n
|
image_name: kubernetes-demo
|
||||||
\ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens:
|
apis:
|
||||||
${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify:
|
- agents
|
||||||
${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type:
|
- inference
|
||||||
remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n
|
- files
|
||||||
\ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n
|
- safety
|
||||||
\ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n
|
- telemetry
|
||||||
\ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n
|
- tool_runtime
|
||||||
\ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n
|
- vector_io
|
||||||
\ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n
|
providers:
|
||||||
\ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n
|
inference:
|
||||||
\ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n
|
- provider_id: vllm-inference
|
||||||
\ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id:
|
provider_type: remote::vllm
|
||||||
meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir:
|
config:
|
||||||
${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n
|
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||||
\ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
\ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
\ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
\ provider_type: inline::meta-reference\n config:\n persistence_store:\n
|
- provider_id: vllm-safety
|
||||||
\ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port:
|
provider_type: remote::vllm
|
||||||
${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user:
|
config:
|
||||||
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
|
||||||
\ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n
|
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||||
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n
|
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||||
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||||
\ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n
|
- provider_id: sentence-transformers
|
||||||
\ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks:
|
provider_type: inline::sentence-transformers
|
||||||
${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n
|
config: {}
|
||||||
\ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n
|
vector_io:
|
||||||
\ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n
|
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||||
\ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results:
|
provider_type: remote::chromadb
|
||||||
3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config:
|
config:
|
||||||
{}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n
|
url: ${env.CHROMADB_URL:=}
|
||||||
\ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n
|
kvstore:
|
||||||
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user:
|
type: postgres
|
||||||
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
\ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host:
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n-
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id:
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n
|
files:
|
||||||
\ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id:
|
- provider_id: meta-reference-files
|
||||||
${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n
|
provider_type: inline::localfs
|
||||||
\ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs:
|
config:
|
||||||
[]\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id:
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||||
builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n
|
metadata_store:
|
||||||
\ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n
|
type: sqlite
|
||||||
\ type: github_token\n"
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||||
|
safety:
|
||||||
|
- provider_id: llama-guard
|
||||||
|
provider_type: inline::llama-guard
|
||||||
|
config:
|
||||||
|
excluded_categories: []
|
||||||
|
agents:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
persistence_store:
|
||||||
|
type: postgres
|
||||||
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
responses_store:
|
||||||
|
type: postgres
|
||||||
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
telemetry:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
|
sinks: ${env.TELEMETRY_SINKS:=console}
|
||||||
|
tool_runtime:
|
||||||
|
- provider_id: brave-search
|
||||||
|
provider_type: remote::brave-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: tavily-search
|
||||||
|
provider_type: remote::tavily-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: rag-runtime
|
||||||
|
provider_type: inline::rag-runtime
|
||||||
|
config: {}
|
||||||
|
- provider_id: model-context-protocol
|
||||||
|
provider_type: remote::model-context-protocol
|
||||||
|
config: {}
|
||||||
|
storage:
|
||||||
|
backends:
|
||||||
|
kv_default:
|
||||||
|
type: kv_postgres
|
||||||
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
||||||
|
sql_default:
|
||||||
|
type: sql_postgres
|
||||||
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
references:
|
||||||
|
metadata:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: registry
|
||||||
|
inference:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: inference_store
|
||||||
|
models:
|
||||||
|
- metadata:
|
||||||
|
embedding_dimension: 768
|
||||||
|
model_id: nomic-embed-text-v1.5
|
||||||
|
provider_id: sentence-transformers
|
||||||
|
model_type: embedding
|
||||||
|
- metadata: {}
|
||||||
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
|
provider_id: vllm-inference
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
|
provider_id: vllm-safety
|
||||||
|
model_type: llm
|
||||||
|
shields:
|
||||||
|
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
|
||||||
|
vector_dbs: []
|
||||||
|
datasets: []
|
||||||
|
scoring_fns: []
|
||||||
|
benchmarks: []
|
||||||
|
tool_groups:
|
||||||
|
- toolgroup_id: builtin::websearch
|
||||||
|
provider_id: tavily-search
|
||||||
|
- toolgroup_id: builtin::rag
|
||||||
|
provider_id: rag-runtime
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
auth:
|
||||||
|
provider_config:
|
||||||
|
type: github_token
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
creationTimestamp: null
|
|
||||||
name: llama-stack-config
|
name: llama-stack-config
|
||||||
|
|
|
||||||
|
|
@ -93,21 +93,30 @@ providers:
|
||||||
- provider_id: model-context-protocol
|
- provider_id: model-context-protocol
|
||||||
provider_type: remote::model-context-protocol
|
provider_type: remote::model-context-protocol
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
storage:
|
||||||
type: postgres
|
backends:
|
||||||
|
kv_default:
|
||||||
|
type: kv_postgres
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
table_name: llamastack_kvstore
|
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
||||||
inference_store:
|
sql_default:
|
||||||
type: postgres
|
type: sql_postgres
|
||||||
host: ${env.POSTGRES_HOST:=localhost}
|
host: ${env.POSTGRES_HOST:=localhost}
|
||||||
port: ${env.POSTGRES_PORT:=5432}
|
port: ${env.POSTGRES_PORT:=5432}
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
|
references:
|
||||||
|
metadata:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: registry
|
||||||
|
inference:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: inference_store
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
|
||||||
uv venv starter --python 3.12
|
uv venv starter --python 3.12
|
||||||
source starter/bin/activate # On Windows: starter\Scripts\activate
|
source starter/bin/activate # On Windows: starter\Scripts\activate
|
||||||
pip install --no-cache llama-stack==0.2.2
|
pip install --no-cache llama-stack==0.2.2
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
export FIREWORKS_API_KEY=<SOME_KEY>
|
export FIREWORKS_API_KEY=<SOME_KEY>
|
||||||
llama stack run starter --port 5050
|
llama stack run starter --port 5050
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -166,10 +166,10 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
Install the distribution dependencies before launching:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro dell --image-type venv
|
llama stack list-deps dell | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
DEH_URL=$DEH_URL \
|
DEH_URL=$DEH_URL \
|
||||||
CHROMA_URL=$CHROMA_URL \
|
CHROMA_URL=$CHROMA_URL \
|
||||||
|
|
|
||||||
|
|
@ -81,10 +81,10 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
Make sure you have the Llama Stack CLI available.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro meta-reference-gpu --image-type venv
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
|
|
|
||||||
|
|
@ -136,11 +136,11 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack build --distro nvidia --image-type venv
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,11 @@ docker run \
|
||||||
Ensure you have configured the starter distribution using the environment variables explained above.
|
Ensure you have configured the starter distribution using the environment variables explained above.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
# Install dependencies for the starter distribution
|
||||||
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
uv run --with llama-stack llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example Usage
|
## Example Usage
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,17 @@ Another simple way to start interacting with Llama Stack is to just spin up a co
|
||||||
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](../deploying/kubernetes_deployment) for more details.
|
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](../deploying/kubernetes_deployment) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
## Configure logging
|
||||||
|
|
||||||
|
Control log output via environment variables before starting the server.
|
||||||
|
|
||||||
|
- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`.
|
||||||
|
- Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
|
||||||
|
- Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
|
||||||
|
- `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.
|
||||||
|
|
||||||
|
Export these variables prior to running `llama stack run`, launching a container, or starting the server through any other pathway.
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
|
||||||
|
|
@ -58,15 +58,19 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="venv" label="Using venv">
|
<TabItem value="venv" label="Using venv">
|
||||||
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
You can use Python to install dependencies and run the Llama Stack server, which is useful for testing and development.
|
||||||
|
|
||||||
Llama Stack uses a [YAML configuration file](../distributions/configuration) to specify the stack setup,
|
Llama Stack uses a [YAML configuration file](../distributions/configuration) to specify the stack setup,
|
||||||
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml).
|
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml).
|
||||||
Now let's build and run the Llama Stack config for Ollama.
|
Now let's install dependencies and run the Llama Stack config for Ollama.
|
||||||
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv --run
|
# Install dependencies for the starter distribution
|
||||||
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
llama stack run starter
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="container" label="Using a Container">
|
<TabItem value="container" label="Using a Container">
|
||||||
|
|
@ -304,7 +308,7 @@ stream = agent.create_turn(
|
||||||
for event in AgentEventLogger().log(stream):
|
for event in AgentEventLogger().log(stream):
|
||||||
event.print()
|
event.print()
|
||||||
```
|
```
|
||||||
### ii. Run the Script
|
#### ii. Run the Script
|
||||||
Let's run the script using `uv`
|
Let's run the script using `uv`
|
||||||
```bash
|
```bash
|
||||||
uv run python agent.py
|
uv run python agent.py
|
||||||
|
|
|
||||||
|
|
@ -24,10 +24,13 @@ ollama run llama3.2:3b --keepalive 60m
|
||||||
|
|
||||||
#### Step 2: Run the Llama Stack server
|
#### Step 2: Run the Llama Stack server
|
||||||
|
|
||||||
We will use `uv` to run the Llama Stack server.
|
We will use `uv` to install dependencies and run the Llama Stack server.
|
||||||
```bash
|
```bash
|
||||||
OLLAMA_URL=http://localhost:11434 \
|
# Install dependencies for the starter distribution
|
||||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||||
```
|
```
|
||||||
#### Step 3: Run the demo
|
#### Step 3: Run the demo
|
||||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||||
|
|
|
||||||
|
|
@ -14,16 +14,18 @@ Meta's reference implementation of an agent system that can use tools, access ve
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No | | |
|
||||||
| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | |
|
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
persistence_store:
|
persistence:
|
||||||
type: sqlite
|
agent_state:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
|
namespace: agents
|
||||||
responses_store:
|
backend: kv_default
|
||||||
type: sqlite
|
responses:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
|
table_name: responses
|
||||||
|
backend: sql_default
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Configuration for the key-value store backend. |
|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Configuration for the key-value store backend. |
|
||||||
| `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
|
| `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
|
||||||
| `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
|
| `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
|
||||||
|
|
||||||
|
|
@ -22,6 +22,6 @@ Reference implementation of batches API with KVStore persistence.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
namespace: batches
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -14,12 +14,12 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
namespace: datasetio::localfs
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -14,12 +14,12 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
namespace: datasetio::huggingface
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
---
|
---
|
||||||
description: "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
description: "Evaluations
|
||||||
|
|
||||||
|
Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||||
sidebar_label: Eval
|
sidebar_label: Eval
|
||||||
title: Eval
|
title: Eval
|
||||||
---
|
---
|
||||||
|
|
@ -8,6 +10,8 @@ title: Eval
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
Evaluations
|
||||||
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **eval** API.
|
This section contains documentation for all available providers for the **eval** API.
|
||||||
|
|
|
||||||
|
|
@ -14,12 +14,12 @@ Meta's reference implementation of evaluation tasks with support for multiple la
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
namespace: eval
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -240,6 +240,6 @@ additional_pip_packages:
|
||||||
- sqlalchemy[asyncio]
|
- sqlalchemy[asyncio]
|
||||||
```
|
```
|
||||||
|
|
||||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
|
||||||
|
|
||||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `storage_dir` | `<class 'str'>` | No | | Directory to store uploaded files |
|
| `storage_dir` | `<class 'str'>` | No | | Directory to store uploaded files |
|
||||||
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
|
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
|
||||||
| `ttl_secs` | `<class 'int'>` | No | 31536000 | |
|
| `ttl_secs` | `<class 'int'>` | No | 31536000 | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
@ -23,6 +23,6 @@ Local filesystem-based file storage provider for managing files and documents lo
|
||||||
```yaml
|
```yaml
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
table_name: files_metadata
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
|
backend: sql_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad
|
||||||
| `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) |
|
| `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) |
|
||||||
| `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
|
| `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
|
||||||
| `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
|
| `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
|
||||||
| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
|
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
|
|
@ -32,6 +32,6 @@ aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
|
||||||
endpoint_url: ${env.S3_ENDPOINT_URL:=}
|
endpoint_url: ${env.S3_ENDPOINT_URL:=}
|
||||||
auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false}
|
auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
table_name: s3_files_metadata
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db
|
backend: sql_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -79,13 +79,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `db_path` | `<class 'str'>` | No | | |
|
| `db_path` | `<class 'str'>` | No | | |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
db_path: ${env.CHROMADB_PATH}
|
db_path: ${env.CHROMADB_PATH}
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::chroma
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -95,12 +95,12 @@ more details about Faiss in general.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::faiss
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -14,14 +14,14 @@ Meta's reference implementation of a vector database.
|
||||||
|
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::faiss
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
## Deprecation Notice
|
## Deprecation Notice
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,14 +17,14 @@ Please refer to the remote provider documentation.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `db_path` | `<class 'str'>` | No | | |
|
| `db_path` | `<class 'str'>` | No | | |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) |
|
||||||
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
|
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::milvus
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -98,13 +98,13 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `path` | `<class 'str'>` | No | | |
|
| `path` | `<class 'str'>` | No | | |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
|
path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::qdrant
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -408,13 +408,13 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
|
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::sqlite_vec
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -17,15 +17,15 @@ Please refer to the sqlite-vec provider documentation.
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
|
| `db_path` | `<class 'str'>` | No | | Path to the SQLite database file |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend (SQLite only for now) |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::sqlite_vec
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
## Deprecation Notice
|
## Deprecation Notice
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -78,13 +78,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `url` | `str \| None` | No | | |
|
| `url` | `str \| None` | No | | |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
url: ${env.CHROMADB_URL}
|
url: ${env.CHROMADB_URL}
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::chroma_remote
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
|
||||||
| `uri` | `<class 'str'>` | No | | The URI of the Milvus server |
|
| `uri` | `<class 'str'>` | No | | The URI of the Milvus server |
|
||||||
| `token` | `str \| None` | No | | The token of the Milvus server |
|
| `token` | `str \| None` | No | | The token of the Milvus server |
|
||||||
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
|
| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | Config for KV store backend |
|
||||||
| `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
|
| `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
|
||||||
|
|
||||||
:::note
|
:::note
|
||||||
|
|
@ -420,7 +420,7 @@ This configuration class accepts additional fields beyond those listed above. Yo
|
||||||
```yaml
|
```yaml
|
||||||
uri: ${env.MILVUS_ENDPOINT}
|
uri: ${env.MILVUS_ENDPOINT}
|
||||||
token: ${env.MILVUS_TOKEN}
|
token: ${env.MILVUS_TOKEN}
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::milvus_remote
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
|
||||||
| `db` | `str \| None` | No | postgres | |
|
| `db` | `str \| None` | No | postgres | |
|
||||||
| `user` | `str \| None` | No | postgres | |
|
| `user` | `str \| None` | No | postgres | |
|
||||||
| `password` | `str \| None` | No | mysecretpassword | |
|
| `password` | `str \| None` | No | mysecretpassword | |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) |
|
| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
|
|
@ -228,7 +228,7 @@ port: ${env.PGVECTOR_PORT:=5432}
|
||||||
db: ${env.PGVECTOR_DB}
|
db: ${env.PGVECTOR_DB}
|
||||||
user: ${env.PGVECTOR_USER}
|
user: ${env.PGVECTOR_USER}
|
||||||
password: ${env.PGVECTOR_PASSWORD}
|
password: ${env.PGVECTOR_PASSWORD}
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::pgvector
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,13 @@ Please refer to the inline provider documentation.
|
||||||
| `prefix` | `str \| None` | No | | |
|
| `prefix` | `str \| None` | No | | |
|
||||||
| `timeout` | `int \| None` | No | | |
|
| `timeout` | `int \| None` | No | | |
|
||||||
| `host` | `str \| None` | No | | |
|
| `host` | `str \| None` | No | | |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | |
|
| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No | | |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
api_key: ${env.QDRANT_API_KEY:=}
|
api_key: ${env.QDRANT_API_KEY:=}
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::qdrant_remote
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -75,14 +75,14 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance |
|
| `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance |
|
||||||
| `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
|
| `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
|
||||||
| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No | | Config for KV store backend (SQLite only for now) |
|
| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
weaviate_api_key: null
|
weaviate_api_key: null
|
||||||
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
||||||
kvstore:
|
persistence:
|
||||||
type: sqlite
|
namespace: vector_io::weaviate
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db
|
backend: kv_default
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,8 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro together\n",
|
"!uv run --with llama-stack llama stack list-deps together | xargs -L1 uv pip install\n",
|
||||||
|
"!uv run --with llama-stack llama stack run together\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -233,7 +233,8 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
|
"!uv run --with llama-stack llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install\n",
|
||||||
|
"!uv run --with llama-stack llama stack run meta-reference-gpu\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -223,7 +223,8 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro llama_api\n",
|
"!uv run --with llama-stack llama stack list-deps llama_api | xargs -L1 uv pip install\n",
|
||||||
|
"!uv run --with llama-stack llama stack run llama_api\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -2864,7 +2864,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
|
"!llama stack list-deps experimental-post-training | xargs -L1 uv pip install"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!pip install -U llama-stack\n",
|
"!pip install -U llama-stack\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
|
"llama stack list-deps fireworks | xargs -L1 uv pip install\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro together --image-type venv"
|
"!uv run llama stack list-deps together | xargs -L1 uv pip install\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,8 @@
|
||||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||||
|
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
@ -172,7 +173,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes using pkill command\n",
|
" # Kill any existing llama stack server processes using pkill command\n",
|
||||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
" os.system(\"pkill -f llama_stack.core.server.server\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,8 @@
|
||||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||||
|
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\n",
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\n",
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,7 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro starter\n",
|
"!uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -47,11 +47,11 @@ function QuickStart() {
|
||||||
<pre><code>{`# Install uv and start Ollama
|
<pre><code>{`# Install uv and start Ollama
|
||||||
ollama run llama3.2:3b --keepalive 60m
|
ollama run llama3.2:3b --keepalive 60m
|
||||||
|
|
||||||
|
# Install server dependencies
|
||||||
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
# Run Llama Stack server
|
# Run Llama Stack server
|
||||||
OLLAMA_URL=http://localhost:11434 \\
|
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||||
uv run --with llama-stack \\
|
|
||||||
llama stack build --distro starter \\
|
|
||||||
--image-type venv --run
|
|
||||||
|
|
||||||
# Try the Python SDK
|
# Try the Python SDK
|
||||||
from llama_stack_client import LlamaStackClient
|
from llama_stack_client import LlamaStackClient
|
||||||
|
|
|
||||||
12
docs/static/deprecated-llama-stack-spec.html
vendored
12
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -9024,6 +9024,10 @@
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
},
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
|
},
|
||||||
"input": {
|
"input": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -9901,6 +9905,10 @@
|
||||||
"usage": {
|
"usage": {
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
@ -13449,8 +13457,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Eval",
|
"name": "Eval",
|
||||||
"description": "",
|
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
"x-displayName": "Evaluations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
|
|
|
||||||
12
docs/static/deprecated-llama-stack-spec.yaml
vendored
12
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -6734,6 +6734,10 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
input:
|
input:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -7403,6 +7407,10 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -10196,9 +10204,9 @@ tags:
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
description: ''
|
description: ''
|
||||||
- name: Eval
|
- name: Eval
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
x-displayName: Evaluations
|
||||||
- name: Files
|
- name: Files
|
||||||
description: >-
|
description: >-
|
||||||
This API is used to upload documents that can be used with other Llama Stack
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
|
|
||||||
|
|
@ -5518,8 +5518,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Eval",
|
"name": "Eval",
|
||||||
"description": "",
|
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
"x-displayName": "Evaluations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "PostTraining (Coming Soon)",
|
"name": "PostTraining (Coming Soon)",
|
||||||
|
|
|
||||||
|
|
@ -4119,9 +4119,9 @@ tags:
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
description: ''
|
description: ''
|
||||||
- name: Eval
|
- name: Eval
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
x-displayName: Evaluations
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
description: ''
|
description: ''
|
||||||
x-tagGroups:
|
x-tagGroups:
|
||||||
|
|
|
||||||
42
docs/static/llama-stack-spec.html
vendored
42
docs/static/llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create a conversation.",
|
"summary": "Create a conversation.",
|
||||||
"description": "Create a conversation.",
|
"description": "Create a conversation.\nCreate a conversation.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -326,8 +326,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Get a conversation with the given ID.",
|
"summary": "Retrieve a conversation.",
|
||||||
"description": "Get a conversation with the given ID.",
|
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -369,8 +369,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Update a conversation's metadata with the given ID.",
|
"summary": "Update a conversation.",
|
||||||
"description": "Update a conversation's metadata with the given ID.",
|
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -422,8 +422,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation with the given ID.",
|
"summary": "Delete a conversation.",
|
||||||
"description": "Delete a conversation with the given ID.",
|
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -467,8 +467,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "List items in the conversation.",
|
"summary": "List items.",
|
||||||
"description": "List items in the conversation.",
|
"description": "List items.\nList items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -597,8 +597,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create items in the conversation.",
|
"summary": "Create items.",
|
||||||
"description": "Create items in the conversation.",
|
"description": "Create items.\nCreate items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -652,8 +652,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve a conversation item.",
|
"summary": "Retrieve an item.",
|
||||||
"description": "Retrieve a conversation item.",
|
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -704,8 +704,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation item.",
|
"summary": "Delete an item.",
|
||||||
"description": "Delete a conversation item.",
|
"description": "Delete an item.\nDelete a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -7600,6 +7600,10 @@
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
},
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
|
},
|
||||||
"input": {
|
"input": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -8148,6 +8152,10 @@
|
||||||
"usage": {
|
"usage": {
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
@ -13251,8 +13259,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Conversations",
|
"name": "Conversations",
|
||||||
"description": "",
|
"description": "Protocol for conversation management operations.",
|
||||||
"x-displayName": "Protocol for conversation management operations."
|
"x-displayName": "Conversations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
|
|
|
||||||
64
docs/static/llama-stack-spec.yaml
vendored
64
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -192,7 +192,10 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create a conversation.
|
summary: Create a conversation.
|
||||||
description: Create a conversation.
|
description: >-
|
||||||
|
Create a conversation.
|
||||||
|
|
||||||
|
Create a conversation.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -222,8 +225,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Get a conversation with the given ID.
|
summary: Retrieve a conversation.
|
||||||
description: Get a conversation with the given ID.
|
description: >-
|
||||||
|
Retrieve a conversation.
|
||||||
|
|
||||||
|
Get a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -252,9 +258,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: >-
|
summary: Update a conversation.
|
||||||
Update a conversation's metadata with the given ID.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update a conversation.
|
||||||
|
|
||||||
Update a conversation's metadata with the given ID.
|
Update a conversation's metadata with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
|
|
@ -290,8 +297,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation with the given ID.
|
summary: Delete a conversation.
|
||||||
description: Delete a conversation with the given ID.
|
description: >-
|
||||||
|
Delete a conversation.
|
||||||
|
|
||||||
|
Delete a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -321,8 +331,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: List items in the conversation.
|
summary: List items.
|
||||||
description: List items in the conversation.
|
description: >-
|
||||||
|
List items.
|
||||||
|
|
||||||
|
List items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -495,8 +508,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create items in the conversation.
|
summary: Create items.
|
||||||
description: Create items in the conversation.
|
description: >-
|
||||||
|
Create items.
|
||||||
|
|
||||||
|
Create items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -532,8 +548,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Retrieve a conversation item.
|
summary: Retrieve an item.
|
||||||
description: Retrieve a conversation item.
|
description: >-
|
||||||
|
Retrieve an item.
|
||||||
|
|
||||||
|
Retrieve a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -568,8 +587,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation item.
|
summary: Delete an item.
|
||||||
description: Delete a conversation item.
|
description: >-
|
||||||
|
Delete an item.
|
||||||
|
|
||||||
|
Delete a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -5793,6 +5815,10 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
input:
|
input:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -6196,6 +6222,10 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -10146,9 +10176,9 @@ tags:
|
||||||
- `background`
|
- `background`
|
||||||
x-displayName: Agents
|
x-displayName: Agents
|
||||||
- name: Conversations
|
- name: Conversations
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for conversation management operations.
|
Protocol for conversation management operations.
|
||||||
|
x-displayName: Conversations
|
||||||
- name: Files
|
- name: Files
|
||||||
description: >-
|
description: >-
|
||||||
This API is used to upload documents that can be used with other Llama Stack
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
|
|
||||||
46
docs/static/stainless-llama-stack-spec.html
vendored
46
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create a conversation.",
|
"summary": "Create a conversation.",
|
||||||
"description": "Create a conversation.",
|
"description": "Create a conversation.\nCreate a conversation.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -326,8 +326,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Get a conversation with the given ID.",
|
"summary": "Retrieve a conversation.",
|
||||||
"description": "Get a conversation with the given ID.",
|
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -369,8 +369,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Update a conversation's metadata with the given ID.",
|
"summary": "Update a conversation.",
|
||||||
"description": "Update a conversation's metadata with the given ID.",
|
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -422,8 +422,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation with the given ID.",
|
"summary": "Delete a conversation.",
|
||||||
"description": "Delete a conversation with the given ID.",
|
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -467,8 +467,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "List items in the conversation.",
|
"summary": "List items.",
|
||||||
"description": "List items in the conversation.",
|
"description": "List items.\nList items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -597,8 +597,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create items in the conversation.",
|
"summary": "Create items.",
|
||||||
"description": "Create items in the conversation.",
|
"description": "Create items.\nCreate items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -652,8 +652,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve a conversation item.",
|
"summary": "Retrieve an item.",
|
||||||
"description": "Retrieve a conversation item.",
|
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -704,8 +704,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation item.",
|
"summary": "Delete an item.",
|
||||||
"description": "Delete a conversation item.",
|
"description": "Delete an item.\nDelete a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -9272,6 +9272,10 @@
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
},
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
|
},
|
||||||
"input": {
|
"input": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
@ -9820,6 +9824,10 @@
|
||||||
"usage": {
|
"usage": {
|
||||||
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
"$ref": "#/components/schemas/OpenAIResponseUsage",
|
||||||
"description": "(Optional) Token usage information for the response"
|
"description": "(Optional) Token usage information for the response"
|
||||||
|
},
|
||||||
|
"instructions": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) System message inserted into the model's context"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
@ -17928,8 +17936,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Conversations",
|
"name": "Conversations",
|
||||||
"description": "",
|
"description": "Protocol for conversation management operations.",
|
||||||
"x-displayName": "Protocol for conversation management operations."
|
"x-displayName": "Conversations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "DatasetIO",
|
"name": "DatasetIO",
|
||||||
|
|
@ -17941,8 +17949,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Eval",
|
"name": "Eval",
|
||||||
"description": "",
|
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
"x-displayName": "Evaluations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
|
|
|
||||||
68
docs/static/stainless-llama-stack-spec.yaml
vendored
68
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -195,7 +195,10 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create a conversation.
|
summary: Create a conversation.
|
||||||
description: Create a conversation.
|
description: >-
|
||||||
|
Create a conversation.
|
||||||
|
|
||||||
|
Create a conversation.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -225,8 +228,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Get a conversation with the given ID.
|
summary: Retrieve a conversation.
|
||||||
description: Get a conversation with the given ID.
|
description: >-
|
||||||
|
Retrieve a conversation.
|
||||||
|
|
||||||
|
Get a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -255,9 +261,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: >-
|
summary: Update a conversation.
|
||||||
Update a conversation's metadata with the given ID.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update a conversation.
|
||||||
|
|
||||||
Update a conversation's metadata with the given ID.
|
Update a conversation's metadata with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
|
|
@ -293,8 +300,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation with the given ID.
|
summary: Delete a conversation.
|
||||||
description: Delete a conversation with the given ID.
|
description: >-
|
||||||
|
Delete a conversation.
|
||||||
|
|
||||||
|
Delete a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -324,8 +334,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: List items in the conversation.
|
summary: List items.
|
||||||
description: List items in the conversation.
|
description: >-
|
||||||
|
List items.
|
||||||
|
|
||||||
|
List items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -498,8 +511,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create items in the conversation.
|
summary: Create items.
|
||||||
description: Create items in the conversation.
|
description: >-
|
||||||
|
Create items.
|
||||||
|
|
||||||
|
Create items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -535,8 +551,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Retrieve a conversation item.
|
summary: Retrieve an item.
|
||||||
description: Retrieve a conversation item.
|
description: >-
|
||||||
|
Retrieve an item.
|
||||||
|
|
||||||
|
Retrieve a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -571,8 +590,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation item.
|
summary: Delete an item.
|
||||||
description: Delete a conversation item.
|
description: >-
|
||||||
|
Delete an item.
|
||||||
|
|
||||||
|
Delete a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -7006,6 +7028,10 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
input:
|
input:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
|
|
@ -7409,6 +7435,10 @@ components:
|
||||||
$ref: '#/components/schemas/OpenAIResponseUsage'
|
$ref: '#/components/schemas/OpenAIResponseUsage'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Token usage information for the response
|
(Optional) Token usage information for the response
|
||||||
|
instructions:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) System message inserted into the model's context
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- created_at
|
- created_at
|
||||||
|
|
@ -13533,17 +13563,17 @@ tags:
|
||||||
- name: Benchmarks
|
- name: Benchmarks
|
||||||
description: ''
|
description: ''
|
||||||
- name: Conversations
|
- name: Conversations
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for conversation management operations.
|
Protocol for conversation management operations.
|
||||||
|
x-displayName: Conversations
|
||||||
- name: DatasetIO
|
- name: DatasetIO
|
||||||
description: ''
|
description: ''
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
description: ''
|
description: ''
|
||||||
- name: Eval
|
- name: Eval
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
x-displayName: Evaluations
|
||||||
- name: Files
|
- name: Files
|
||||||
description: >-
|
description: >-
|
||||||
This API is used to upload documents that can be used with other Llama Stack
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
|
|
||||||
|
|
@ -78,17 +78,14 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
|
|
||||||
## Build, Configure, and Run Llama Stack
|
## Build, Configure, and Run Llama Stack
|
||||||
|
|
||||||
1. **Build the Llama Stack**:
|
1. **Install dependencies**:
|
||||||
Build the Llama Stack using the `starter` template:
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with llama-stack llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
**Expected Output:**
|
|
||||||
|
2. **Start the distribution**:
|
||||||
```bash
|
```bash
|
||||||
...
|
llama stack run starter
|
||||||
Build Successful!
|
|
||||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
|
||||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Set the ENV variables by exporting them to the terminal**:
|
3. **Set the ENV variables by exporting them to the terminal**:
|
||||||
|
|
|
||||||
|
|
@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
|
||||||
:param tools: (Optional) An array of tools the model may call while generating a response.
|
:param tools: (Optional) An array of tools the model may call while generating a response.
|
||||||
:param truncation: (Optional) Truncation strategy applied to the response
|
:param truncation: (Optional) Truncation strategy applied to the response
|
||||||
:param usage: (Optional) Token usage information for the response
|
:param usage: (Optional) Token usage information for the response
|
||||||
|
:param instructions: (Optional) System message inserted into the model's context
|
||||||
"""
|
"""
|
||||||
|
|
||||||
created_at: int
|
created_at: int
|
||||||
|
|
@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
|
||||||
tools: list[OpenAIResponseTool] | None = None
|
tools: list[OpenAIResponseTool] | None = None
|
||||||
truncation: str | None = None
|
truncation: str | None = None
|
||||||
usage: OpenAIResponseUsage | None = None
|
usage: OpenAIResponseUsage | None = None
|
||||||
|
instructions: str | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
||||||
|
|
@ -173,7 +173,9 @@ class ConversationItemDeletedResource(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Conversations(Protocol):
|
class Conversations(Protocol):
|
||||||
"""Protocol for conversation management operations."""
|
"""Conversations
|
||||||
|
|
||||||
|
Protocol for conversation management operations."""
|
||||||
|
|
||||||
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def create_conversation(
|
async def create_conversation(
|
||||||
|
|
@ -181,6 +183,8 @@ class Conversations(Protocol):
|
||||||
) -> Conversation:
|
) -> Conversation:
|
||||||
"""Create a conversation.
|
"""Create a conversation.
|
||||||
|
|
||||||
|
Create a conversation.
|
||||||
|
|
||||||
:param items: Initial items to include in the conversation context.
|
:param items: Initial items to include in the conversation context.
|
||||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||||
:returns: The created conversation object.
|
:returns: The created conversation object.
|
||||||
|
|
@ -189,7 +193,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||||
"""Get a conversation with the given ID.
|
"""Retrieve a conversation.
|
||||||
|
|
||||||
|
Get a conversation with the given ID.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:returns: The conversation object.
|
:returns: The conversation object.
|
||||||
|
|
@ -198,7 +204,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||||
"""Update a conversation's metadata with the given ID.
|
"""Update a conversation.
|
||||||
|
|
||||||
|
Update a conversation's metadata with the given ID.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||||
|
|
@ -208,7 +216,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||||
"""Delete a conversation with the given ID.
|
"""Delete a conversation.
|
||||||
|
|
||||||
|
Delete a conversation with the given ID.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:returns: The deleted conversation resource.
|
:returns: The deleted conversation resource.
|
||||||
|
|
@ -217,7 +227,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||||
"""Create items in the conversation.
|
"""Create items.
|
||||||
|
|
||||||
|
Create items in the conversation.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param items: Items to include in the conversation context.
|
:param items: Items to include in the conversation context.
|
||||||
|
|
@ -227,7 +239,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||||
"""Retrieve a conversation item.
|
"""Retrieve an item.
|
||||||
|
|
||||||
|
Retrieve a conversation item.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param item_id: The item identifier.
|
:param item_id: The item identifier.
|
||||||
|
|
@ -244,7 +258,9 @@ class Conversations(Protocol):
|
||||||
limit: int | NotGiven = NOT_GIVEN,
|
limit: int | NotGiven = NOT_GIVEN,
|
||||||
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||||||
) -> ConversationItemList:
|
) -> ConversationItemList:
|
||||||
"""List items in the conversation.
|
"""List items.
|
||||||
|
|
||||||
|
List items in the conversation.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param after: An item ID to list items after, used in pagination.
|
:param after: An item ID to list items after, used in pagination.
|
||||||
|
|
@ -259,7 +275,9 @@ class Conversations(Protocol):
|
||||||
async def openai_delete_conversation_item(
|
async def openai_delete_conversation_item(
|
||||||
self, conversation_id: str, item_id: str
|
self, conversation_id: str, item_id: str
|
||||||
) -> ConversationItemDeletedResource:
|
) -> ConversationItemDeletedResource:
|
||||||
"""Delete a conversation item.
|
"""Delete an item.
|
||||||
|
|
||||||
|
Delete a conversation item.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param item_id: The item identifier.
|
:param item_id: The item identifier.
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,9 @@ class EvaluateResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class Eval(Protocol):
|
class Eval(Protocol):
|
||||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
"""Evaluations
|
||||||
|
|
||||||
|
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
|
|
|
||||||
|
|
@ -40,12 +40,20 @@ from llama_stack.core.distribution import get_provider_registry
|
||||||
from llama_stack.core.external import load_external_apis
|
from llama_stack.core.external import load_external_apis
|
||||||
from llama_stack.core.resolver import InvalidProviderError
|
from llama_stack.core.resolver import InvalidProviderError
|
||||||
from llama_stack.core.stack import replace_env_vars
|
from llama_stack.core.stack import replace_env_vars
|
||||||
|
from llama_stack.core.storage.datatypes import (
|
||||||
|
InferenceStoreReference,
|
||||||
|
KVStoreReference,
|
||||||
|
ServerStoresConfig,
|
||||||
|
SqliteKVStoreConfig,
|
||||||
|
SqliteSqlStoreConfig,
|
||||||
|
SqlStoreReference,
|
||||||
|
StorageConfig,
|
||||||
|
)
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
||||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
|
||||||
|
|
||||||
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||||
|
|
||||||
|
|
@ -286,21 +294,42 @@ def _generate_run_config(
|
||||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||||
"""
|
"""
|
||||||
apis = list(build_config.distribution_spec.providers.keys())
|
apis = list(build_config.distribution_spec.providers.keys())
|
||||||
|
distro_dir = DISTRIBS_BASE_DIR / image_name
|
||||||
|
storage = StorageConfig(
|
||||||
|
backends={
|
||||||
|
"kv_default": SqliteKVStoreConfig(
|
||||||
|
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
|
||||||
|
),
|
||||||
|
"sql_default": SqliteSqlStoreConfig(
|
||||||
|
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
stores=ServerStoresConfig(
|
||||||
|
metadata=KVStoreReference(
|
||||||
|
backend="kv_default",
|
||||||
|
namespace="registry",
|
||||||
|
),
|
||||||
|
inference=InferenceStoreReference(
|
||||||
|
backend="sql_default",
|
||||||
|
table_name="inference_store",
|
||||||
|
),
|
||||||
|
conversations=SqlStoreReference(
|
||||||
|
backend="sql_default",
|
||||||
|
table_name="openai_conversations",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
run_config = StackRunConfig(
|
run_config = StackRunConfig(
|
||||||
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||||
image_name=image_name,
|
image_name=image_name,
|
||||||
apis=apis,
|
apis=apis,
|
||||||
providers={},
|
providers={},
|
||||||
|
storage=storage,
|
||||||
external_providers_dir=build_config.external_providers_dir
|
external_providers_dir=build_config.external_providers_dir
|
||||||
if build_config.external_providers_dir
|
if build_config.external_providers_dir
|
||||||
else EXTERNAL_PROVIDERS_DIR,
|
else EXTERNAL_PROVIDERS_DIR,
|
||||||
)
|
)
|
||||||
if not run_config.inference_store:
|
|
||||||
run_config.inference_store = SqliteSqlStoreConfig(
|
|
||||||
**SqliteSqlStoreConfig.sample_run_config(
|
|
||||||
__distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
# build providers dict
|
# build providers dict
|
||||||
provider_registry = get_provider_registry(build_config)
|
provider_registry = get_provider_registry(build_config)
|
||||||
for api in apis:
|
for api in apis:
|
||||||
|
|
|
||||||
182
llama_stack/cli/stack/_list_deps.py
Normal file
182
llama_stack/cli/stack/_list_deps.py
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
|
from llama_stack.core.build import get_provider_dependencies
|
||||||
|
from llama_stack.core.datatypes import (
|
||||||
|
BuildConfig,
|
||||||
|
BuildProvider,
|
||||||
|
DistributionSpec,
|
||||||
|
)
|
||||||
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
|
from llama_stack.core.stack import replace_env_vars
|
||||||
|
from llama_stack.log import get_logger
|
||||||
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||||
|
|
||||||
|
logger = get_logger(name=__name__, category="cli")
|
||||||
|
|
||||||
|
|
||||||
|
# These are the dependencies needed by the distribution server.
|
||||||
|
# `llama-stack` is automatically installed by the installation script.
|
||||||
|
SERVER_DEPENDENCIES = [
|
||||||
|
"aiosqlite",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"uvicorn",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def format_output_deps_only(
|
||||||
|
normal_deps: list[str],
|
||||||
|
special_deps: list[str],
|
||||||
|
external_deps: list[str],
|
||||||
|
uv: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""Format dependencies as a list."""
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
uv_str = ""
|
||||||
|
if uv:
|
||||||
|
uv_str = "uv pip install "
|
||||||
|
|
||||||
|
# Quote deps with commas
|
||||||
|
quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
|
||||||
|
lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
|
||||||
|
|
||||||
|
for special_dep in special_deps:
|
||||||
|
lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
|
||||||
|
|
||||||
|
for external_dep in external_deps:
|
||||||
|
lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||||
|
if args.config:
|
||||||
|
try:
|
||||||
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
|
|
||||||
|
config_file = resolve_config_or_distro(args.config, Mode.BUILD)
|
||||||
|
except ValueError as e:
|
||||||
|
cprint(
|
||||||
|
f"Could not parse config file {args.config}: {e}",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if config_file:
|
||||||
|
with open(config_file) as f:
|
||||||
|
try:
|
||||||
|
contents = yaml.safe_load(f)
|
||||||
|
contents = replace_env_vars(contents)
|
||||||
|
build_config = BuildConfig(**contents)
|
||||||
|
build_config.image_type = "venv"
|
||||||
|
except Exception as e:
|
||||||
|
cprint(
|
||||||
|
f"Could not parse config file {config_file}: {e}",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
elif args.providers:
|
||||||
|
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||||
|
for api_provider in args.providers.split(","):
|
||||||
|
if "=" not in api_provider:
|
||||||
|
cprint(
|
||||||
|
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
api, provider_type = api_provider.split("=")
|
||||||
|
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||||
|
if providers_for_api is None:
|
||||||
|
cprint(
|
||||||
|
f"{api} is not a valid API.",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if provider_type in providers_for_api:
|
||||||
|
provider = BuildProvider(
|
||||||
|
provider_type=provider_type,
|
||||||
|
module=None,
|
||||||
|
)
|
||||||
|
provider_list.setdefault(api, []).append(provider)
|
||||||
|
else:
|
||||||
|
cprint(
|
||||||
|
f"{provider_type} is not a valid provider for the {api} API.",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
distribution_spec = DistributionSpec(
|
||||||
|
providers=provider_list,
|
||||||
|
description=",".join(args.providers),
|
||||||
|
)
|
||||||
|
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
|
||||||
|
|
||||||
|
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
||||||
|
normal_deps += SERVER_DEPENDENCIES
|
||||||
|
|
||||||
|
# Add external API dependencies
|
||||||
|
if build_config.external_apis_dir:
|
||||||
|
from llama_stack.core.external import load_external_apis
|
||||||
|
|
||||||
|
external_apis = load_external_apis(build_config)
|
||||||
|
if external_apis:
|
||||||
|
for _, api_spec in external_apis.items():
|
||||||
|
normal_deps.extend(api_spec.pip_packages)
|
||||||
|
|
||||||
|
# Format and output based on requested format
|
||||||
|
output = format_output_deps_only(
|
||||||
|
normal_deps=normal_deps,
|
||||||
|
special_deps=special_deps,
|
||||||
|
external_deps=external_provider_dependencies,
|
||||||
|
uv=args.format == "uv",
|
||||||
|
)
|
||||||
|
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
|
||||||
|
def quote_if_needed(dep):
|
||||||
|
# Add quotes if the dependency contains special characters that need escaping in shell
|
||||||
|
# This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
|
||||||
|
needs_quoting = any(char in dep for char in [",", "<", ">", "="])
|
||||||
|
return f"'{dep}'" if needs_quoting else dep
|
||||||
|
|
||||||
|
|
||||||
|
def quote_special_dep(dep_string):
|
||||||
|
"""
|
||||||
|
Quote individual packages in a special dependency string.
|
||||||
|
Special deps may contain multiple packages and flags like --extra-index-url.
|
||||||
|
We need to quote only the package specs that contain special characters.
|
||||||
|
"""
|
||||||
|
parts = dep_string.split()
|
||||||
|
quoted_parts = []
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
# Don't quote flags (they start with -)
|
||||||
|
if part.startswith("-"):
|
||||||
|
quoted_parts.append(part)
|
||||||
|
else:
|
||||||
|
# Quote package specs that need it
|
||||||
|
quoted_parts.append(quote_if_needed(part))
|
||||||
|
|
||||||
|
return " ".join(quoted_parts)
|
||||||
|
|
@ -8,6 +8,9 @@ import textwrap
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__, category="cli")
|
||||||
|
|
||||||
|
|
||||||
class StackBuild(Subcommand):
|
class StackBuild(Subcommand):
|
||||||
|
|
@ -16,7 +19,7 @@ class StackBuild(Subcommand):
|
||||||
self.parser = subparsers.add_parser(
|
self.parser = subparsers.add_parser(
|
||||||
"build",
|
"build",
|
||||||
prog="llama stack build",
|
prog="llama stack build",
|
||||||
description="Build a Llama stack container",
|
description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||||
)
|
)
|
||||||
self._add_arguments()
|
self._add_arguments()
|
||||||
|
|
@ -93,6 +96,9 @@ the build. If not specified, currently active environment will be used if found.
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||||
|
logger.warning(
|
||||||
|
"The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
|
||||||
|
)
|
||||||
# always keep implementation completely silo-ed away from CLI so CLI
|
# always keep implementation completely silo-ed away from CLI so CLI
|
||||||
# can be fast to load and reduces dependencies
|
# can be fast to load and reduces dependencies
|
||||||
from ._build import run_stack_build_command
|
from ._build import run_stack_build_command
|
||||||
|
|
|
||||||
51
llama_stack/cli/stack/list_deps.py
Normal file
51
llama_stack/cli/stack/list_deps.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
|
class StackListDeps(Subcommand):
|
||||||
|
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||||
|
super().__init__()
|
||||||
|
self.parser = subparsers.add_parser(
|
||||||
|
"list-deps",
|
||||||
|
prog="llama stack list-deps",
|
||||||
|
description="list the dependencies for a llama stack distribution",
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||||
|
)
|
||||||
|
self._add_arguments()
|
||||||
|
self.parser.set_defaults(func=self._run_stack_list_deps_command)
|
||||||
|
|
||||||
|
def _add_arguments(self):
|
||||||
|
self.parser.add_argument(
|
||||||
|
"config",
|
||||||
|
type=str,
|
||||||
|
nargs="?", # Make it optional
|
||||||
|
metavar="config | distro",
|
||||||
|
help="Path to config file to use or name of known distro (llama stack list for a list).",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.parser.add_argument(
|
||||||
|
"--providers",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
|
||||||
|
)
|
||||||
|
self.parser.add_argument(
|
||||||
|
"--format",
|
||||||
|
type=str,
|
||||||
|
choices=["uv", "deps-only"],
|
||||||
|
default="deps-only",
|
||||||
|
help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
||||||
|
# always keep implementation completely silo-ed away from CLI so CLI
|
||||||
|
# can be fast to load and reduces dependencies
|
||||||
|
from ._list_deps import run_stack_list_deps_command
|
||||||
|
|
||||||
|
return run_stack_list_deps_command(args)
|
||||||
|
|
@ -13,6 +13,7 @@ from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
from .build import StackBuild
|
from .build import StackBuild
|
||||||
from .list_apis import StackListApis
|
from .list_apis import StackListApis
|
||||||
|
from .list_deps import StackListDeps
|
||||||
from .list_providers import StackListProviders
|
from .list_providers import StackListProviders
|
||||||
from .remove import StackRemove
|
from .remove import StackRemove
|
||||||
from .run import StackRun
|
from .run import StackRun
|
||||||
|
|
@ -39,6 +40,7 @@ class StackParser(Subcommand):
|
||||||
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
||||||
|
|
||||||
# Add sub-commands
|
# Add sub-commands
|
||||||
|
StackListDeps.create(subparsers)
|
||||||
StackBuild.create(subparsers)
|
StackBuild.create(subparsers)
|
||||||
StackListApis.create(subparsers)
|
StackListApis.create(subparsers)
|
||||||
StackListProviders.create(subparsers)
|
StackListProviders.create(subparsers)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,37 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import (
|
||||||
|
BuildConfig,
|
||||||
|
Provider,
|
||||||
|
StackRunConfig,
|
||||||
|
StorageConfig,
|
||||||
|
)
|
||||||
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
|
from llama_stack.core.resolver import InvalidProviderError
|
||||||
|
from llama_stack.core.storage.datatypes import (
|
||||||
|
InferenceStoreReference,
|
||||||
|
KVStoreReference,
|
||||||
|
ServerStoresConfig,
|
||||||
|
SqliteKVStoreConfig,
|
||||||
|
SqliteSqlStoreConfig,
|
||||||
|
SqlStoreReference,
|
||||||
|
)
|
||||||
|
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
|
||||||
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
|
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||||
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||||
|
|
||||||
|
|
||||||
class ImageType(Enum):
|
class ImageType(Enum):
|
||||||
|
|
@ -19,3 +49,103 @@ def print_subcommand_description(parser, subparsers):
|
||||||
description = subcommand.description
|
description = subcommand.description
|
||||||
description_text += f" {name:<21} {description}\n"
|
description_text += f" {name:<21} {description}\n"
|
||||||
parser.epilog = description_text
|
parser.epilog = description_text
|
||||||
|
|
||||||
|
|
||||||
|
def generate_run_config(
|
||||||
|
build_config: BuildConfig,
|
||||||
|
build_dir: Path,
|
||||||
|
image_name: str,
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||||
|
"""
|
||||||
|
apis = list(build_config.distribution_spec.providers.keys())
|
||||||
|
distro_dir = DISTRIBS_BASE_DIR / image_name
|
||||||
|
run_config = StackRunConfig(
|
||||||
|
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||||
|
image_name=image_name,
|
||||||
|
apis=apis,
|
||||||
|
providers={},
|
||||||
|
storage=StorageConfig(
|
||||||
|
backends={
|
||||||
|
"kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
|
||||||
|
"sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
|
||||||
|
},
|
||||||
|
stores=ServerStoresConfig(
|
||||||
|
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
||||||
|
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
||||||
|
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
external_providers_dir=build_config.external_providers_dir
|
||||||
|
if build_config.external_providers_dir
|
||||||
|
else EXTERNAL_PROVIDERS_DIR,
|
||||||
|
)
|
||||||
|
# build providers dict
|
||||||
|
provider_registry = get_provider_registry(build_config)
|
||||||
|
for api in apis:
|
||||||
|
run_config.providers[api] = []
|
||||||
|
providers = build_config.distribution_spec.providers[api]
|
||||||
|
|
||||||
|
for provider in providers:
|
||||||
|
pid = provider.provider_type.split("::")[-1]
|
||||||
|
|
||||||
|
p = provider_registry[Api(api)][provider.provider_type]
|
||||||
|
if p.deprecation_error:
|
||||||
|
raise InvalidProviderError(p.deprecation_error)
|
||||||
|
|
||||||
|
try:
|
||||||
|
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
||||||
|
except (ModuleNotFoundError, ValueError) as exc:
|
||||||
|
# HACK ALERT:
|
||||||
|
# This code executes after building is done, the import cannot work since the
|
||||||
|
# package is either available in the venv or container - not available on the host.
|
||||||
|
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
||||||
|
# external
|
||||||
|
cprint(
|
||||||
|
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
||||||
|
color="yellow",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
# Set config_type to None to avoid UnboundLocalError
|
||||||
|
config_type = None
|
||||||
|
|
||||||
|
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
||||||
|
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
||||||
|
else:
|
||||||
|
config = {}
|
||||||
|
|
||||||
|
p_spec = Provider(
|
||||||
|
provider_id=pid,
|
||||||
|
provider_type=provider.provider_type,
|
||||||
|
config=config,
|
||||||
|
module=provider.module,
|
||||||
|
)
|
||||||
|
run_config.providers[api].append(p_spec)
|
||||||
|
|
||||||
|
run_config_file = build_dir / f"{image_name}-run.yaml"
|
||||||
|
|
||||||
|
with open(run_config_file, "w") as f:
|
||||||
|
to_write = json.loads(run_config.model_dump_json())
|
||||||
|
f.write(yaml.dump(to_write, sort_keys=False))
|
||||||
|
|
||||||
|
# Only print this message for non-container builds since it will be displayed before the
|
||||||
|
# container is built
|
||||||
|
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
||||||
|
# makes sense to display this message
|
||||||
|
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||||
|
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||||
|
return run_config_file
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def available_templates_specs() -> dict[str, BuildConfig]:
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
template_specs = {}
|
||||||
|
for p in TEMPLATES_PATH.rglob("*build.yaml"):
|
||||||
|
template_name = p.parent.name
|
||||||
|
with open(p) as f:
|
||||||
|
build_config = BuildConfig(**yaml.safe_load(f))
|
||||||
|
template_specs[template_name] = build_config
|
||||||
|
return template_specs
|
||||||
|
|
|
||||||
|
|
@ -159,6 +159,37 @@ def upgrade_from_routing_table(
|
||||||
config_dict["apis"] = config_dict["apis_to_serve"]
|
config_dict["apis"] = config_dict["apis_to_serve"]
|
||||||
config_dict.pop("apis_to_serve", None)
|
config_dict.pop("apis_to_serve", None)
|
||||||
|
|
||||||
|
# Add default storage config if not present
|
||||||
|
if "storage" not in config_dict:
|
||||||
|
config_dict["storage"] = {
|
||||||
|
"backends": {
|
||||||
|
"kv_default": {
|
||||||
|
"type": "kv_sqlite",
|
||||||
|
"db_path": "~/.llama/kvstore.db",
|
||||||
|
},
|
||||||
|
"sql_default": {
|
||||||
|
"type": "sql_sqlite",
|
||||||
|
"db_path": "~/.llama/sql_store.db",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"stores": {
|
||||||
|
"metadata": {
|
||||||
|
"namespace": "registry",
|
||||||
|
"backend": "kv_default",
|
||||||
|
},
|
||||||
|
"inference": {
|
||||||
|
"table_name": "inference_store",
|
||||||
|
"backend": "sql_default",
|
||||||
|
"max_write_queue_size": 10000,
|
||||||
|
"num_writers": 4,
|
||||||
|
},
|
||||||
|
"conversations": {
|
||||||
|
"table_name": "openai_conversations",
|
||||||
|
"backend": "sql_default",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
return config_dict
|
return config_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import os
|
|
||||||
import secrets
|
import secrets
|
||||||
import time
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import (
|
||||||
Conversations,
|
Conversations,
|
||||||
Metadata,
|
Metadata,
|
||||||
)
|
)
|
||||||
from llama_stack.core.datatypes import AccessRule
|
from llama_stack.core.datatypes import AccessRule, StackRunConfig
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
|
||||||
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||||
from llama_stack.providers.utils.sqlstore.sqlstore import (
|
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
|
||||||
SqliteSqlStoreConfig,
|
|
||||||
SqlStoreConfig,
|
|
||||||
sqlstore_impl,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="openai_conversations")
|
logger = get_logger(name=__name__, category="openai_conversations")
|
||||||
|
|
||||||
|
|
@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations")
|
||||||
class ConversationServiceConfig(BaseModel):
|
class ConversationServiceConfig(BaseModel):
|
||||||
"""Configuration for the built-in conversation service.
|
"""Configuration for the built-in conversation service.
|
||||||
|
|
||||||
:param conversations_store: SQL store configuration for conversations (defaults to SQLite)
|
:param run_config: Stack run configuration for resolving persistence
|
||||||
:param policy: Access control rules
|
:param policy: Access control rules
|
||||||
"""
|
"""
|
||||||
|
|
||||||
conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
|
run_config: StackRunConfig
|
||||||
db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
|
|
||||||
)
|
|
||||||
policy: list[AccessRule] = []
|
policy: list[AccessRule] = []
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations):
|
||||||
self.deps = deps
|
self.deps = deps
|
||||||
self.policy = config.policy
|
self.policy = config.policy
|
||||||
|
|
||||||
base_sql_store = sqlstore_impl(config.conversations_store)
|
# Use conversations store reference from run config
|
||||||
|
conversations_ref = config.run_config.storage.stores.conversations
|
||||||
|
if not conversations_ref:
|
||||||
|
raise ValueError("storage.stores.conversations must be configured in run config")
|
||||||
|
|
||||||
|
base_sql_store = sqlstore_impl(conversations_ref)
|
||||||
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
|
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
"""Initialize the store and create tables."""
|
"""Initialize the store and create tables."""
|
||||||
if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
|
|
||||||
os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
|
|
||||||
|
|
||||||
await self.sql_store.create_table(
|
await self.sql_store.create_table(
|
||||||
"openai_conversations",
|
"openai_conversations",
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,12 @@ from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
|
||||||
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
|
||||||
from llama_stack.apis.vector_io import VectorIO
|
from llama_stack.apis.vector_io import VectorIO
|
||||||
from llama_stack.core.access_control.datatypes import AccessRule
|
from llama_stack.core.access_control.datatypes import AccessRule
|
||||||
|
from llama_stack.core.storage.datatypes import (
|
||||||
|
KVStoreReference,
|
||||||
|
StorageBackendType,
|
||||||
|
StorageConfig,
|
||||||
|
)
|
||||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
|
||||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
|
|
||||||
|
|
||||||
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
|
||||||
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
LLAMA_STACK_RUN_CONFIG_VERSION = 2
|
||||||
|
|
@ -382,7 +385,7 @@ class QuotaPeriod(StrEnum):
|
||||||
|
|
||||||
|
|
||||||
class QuotaConfig(BaseModel):
|
class QuotaConfig(BaseModel):
|
||||||
kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
|
kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
|
||||||
anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
|
anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
|
||||||
authenticated_max_requests: int = Field(
|
authenticated_max_requests: int = Field(
|
||||||
default=1000, description="Max requests for authenticated clients per period"
|
default=1000, description="Max requests for authenticated clients per period"
|
||||||
|
|
@ -464,18 +467,6 @@ class ServerConfig(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class InferenceStoreConfig(BaseModel):
|
|
||||||
sql_store_config: SqlStoreConfig
|
|
||||||
max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
|
|
||||||
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
|
||||||
|
|
||||||
|
|
||||||
class ResponsesStoreConfig(BaseModel):
|
|
||||||
sql_store_config: SqlStoreConfig
|
|
||||||
max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
|
|
||||||
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
|
||||||
|
|
||||||
|
|
||||||
class StackRunConfig(BaseModel):
|
class StackRunConfig(BaseModel):
|
||||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
|
|
||||||
|
|
@ -502,26 +493,8 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re
|
||||||
can be instantiated multiple times (with different configs) if necessary.
|
can be instantiated multiple times (with different configs) if necessary.
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
metadata_store: KVStoreConfig | None = Field(
|
storage: StorageConfig = Field(
|
||||||
default=None,
|
description="Catalog of named storage backends and references available to the stack",
|
||||||
description="""
|
|
||||||
Configuration for the persistence store used by the distribution registry. If not specified,
|
|
||||||
a default SQLite store will be used.""",
|
|
||||||
)
|
|
||||||
|
|
||||||
inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
|
|
||||||
default=None,
|
|
||||||
description="""
|
|
||||||
Configuration for the persistence store used by the inference API. Can be either a
|
|
||||||
InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
|
|
||||||
If not specified, a default SQLite store will be used.""",
|
|
||||||
)
|
|
||||||
|
|
||||||
conversations_store: SqlStoreConfig | None = Field(
|
|
||||||
default=None,
|
|
||||||
description="""
|
|
||||||
Configuration for the persistence store used by the conversations API.
|
|
||||||
If not specified, a default SQLite store will be used.""",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# registry of "resources" in the distribution
|
# registry of "resources" in the distribution
|
||||||
|
|
@ -566,6 +539,49 @@ If not specified, a default SQLite store will be used.""",
|
||||||
return Path(v)
|
return Path(v)
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def validate_server_stores(self) -> "StackRunConfig":
|
||||||
|
backend_map = self.storage.backends
|
||||||
|
stores = self.storage.stores
|
||||||
|
kv_backends = {
|
||||||
|
name
|
||||||
|
for name, cfg in backend_map.items()
|
||||||
|
if cfg.type
|
||||||
|
in {
|
||||||
|
StorageBackendType.KV_REDIS,
|
||||||
|
StorageBackendType.KV_SQLITE,
|
||||||
|
StorageBackendType.KV_POSTGRES,
|
||||||
|
StorageBackendType.KV_MONGODB,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sql_backends = {
|
||||||
|
name
|
||||||
|
for name, cfg in backend_map.items()
|
||||||
|
if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _ensure_backend(reference, expected_set, store_name: str) -> None:
|
||||||
|
if reference is None:
|
||||||
|
return
|
||||||
|
backend_name = reference.backend
|
||||||
|
if backend_name not in backend_map:
|
||||||
|
raise ValueError(
|
||||||
|
f"{store_name} references unknown backend '{backend_name}'. "
|
||||||
|
f"Available backends: {sorted(backend_map)}"
|
||||||
|
)
|
||||||
|
if backend_name not in expected_set:
|
||||||
|
raise ValueError(
|
||||||
|
f"{store_name} references backend '{backend_name}' of type "
|
||||||
|
f"'{backend_map[backend_name].type.value}', but a backend of type "
|
||||||
|
f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
|
||||||
|
)
|
||||||
|
|
||||||
|
_ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
|
||||||
|
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
|
||||||
|
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
|
||||||
|
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
class BuildConfig(BaseModel):
|
class BuildConfig(BaseModel):
|
||||||
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,8 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
|
||||||
from llama_stack.core.datatypes import StackRunConfig
|
from llama_stack.core.datatypes import StackRunConfig
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
|
||||||
|
|
||||||
|
|
||||||
class PromptServiceConfig(BaseModel):
|
class PromptServiceConfig(BaseModel):
|
||||||
|
|
@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts):
|
||||||
self.kvstore: KVStore
|
self.kvstore: KVStore
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
kvstore_config = SqliteKVStoreConfig(
|
# Use metadata store backend with prompts-specific namespace
|
||||||
db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
|
metadata_ref = self.config.run_config.storage.stores.metadata
|
||||||
)
|
if not metadata_ref:
|
||||||
self.kvstore = await kvstore_impl(kvstore_config)
|
raise ValueError("storage.stores.metadata must be configured in run config")
|
||||||
|
prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
|
||||||
|
self.kvstore = await kvstore_impl(prompts_ref)
|
||||||
|
|
||||||
def _get_default_key(self, prompt_id: str) -> str:
|
def _get_default_key(self, prompt_id: str) -> str:
|
||||||
"""Get the KVStore key that stores the default version number."""
|
"""Get the KVStore key that stores the default version number."""
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
import importlib
|
import importlib
|
||||||
|
import importlib.metadata
|
||||||
import inspect
|
import inspect
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,10 @@
|
||||||
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.core.datatypes import AccessRule, RoutedProtocol
|
from llama_stack.core.datatypes import (
|
||||||
|
AccessRule,
|
||||||
|
RoutedProtocol,
|
||||||
|
)
|
||||||
from llama_stack.core.stack import StackRunConfig
|
from llama_stack.core.stack import StackRunConfig
|
||||||
from llama_stack.core.store import DistributionRegistry
|
from llama_stack.core.store import DistributionRegistry
|
||||||
from llama_stack.providers.datatypes import Api, RoutingTable
|
from llama_stack.providers.datatypes import Api, RoutingTable
|
||||||
|
|
@ -78,9 +81,13 @@ async def get_auto_router_impl(
|
||||||
api_to_dep_impl[dep_name] = deps[dep_api]
|
api_to_dep_impl[dep_name] = deps[dep_api]
|
||||||
|
|
||||||
# TODO: move pass configs to routers instead
|
# TODO: move pass configs to routers instead
|
||||||
if api == Api.inference and run_config.inference_store:
|
if api == Api.inference:
|
||||||
|
inference_ref = run_config.storage.stores.inference
|
||||||
|
if not inference_ref:
|
||||||
|
raise ValueError("storage.stores.inference must be configured in run config")
|
||||||
|
|
||||||
inference_store = InferenceStore(
|
inference_store = InferenceStore(
|
||||||
config=run_config.inference_store,
|
reference=inference_ref,
|
||||||
policy=policy,
|
policy=policy,
|
||||||
)
|
)
|
||||||
await inference_store.initialize()
|
await inference_store.initialize()
|
||||||
|
|
|
||||||
|
|
@ -72,13 +72,30 @@ class AuthProvider(ABC):
|
||||||
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
|
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
|
||||||
attributes: dict[str, list[str]] = {}
|
attributes: dict[str, list[str]] = {}
|
||||||
for claim_key, attribute_key in mapping.items():
|
for claim_key, attribute_key in mapping.items():
|
||||||
if claim_key not in claims:
|
# First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles")
|
||||||
continue
|
# Then fall back to literal key with dots (e.g., "my.dotted.key")
|
||||||
|
claim: object = claims
|
||||||
|
keys = claim_key.split(".")
|
||||||
|
for key in keys:
|
||||||
|
if isinstance(claim, dict) and key in claim:
|
||||||
|
claim = claim[key]
|
||||||
|
else:
|
||||||
|
claim = None
|
||||||
|
break
|
||||||
|
|
||||||
|
if claim is None and claim_key in claims:
|
||||||
|
# Fall back to checking if claim_key exists as a literal key
|
||||||
claim = claims[claim_key]
|
claim = claims[claim_key]
|
||||||
|
|
||||||
|
if claim is None:
|
||||||
|
continue
|
||||||
|
|
||||||
if isinstance(claim, list):
|
if isinstance(claim, list):
|
||||||
values = claim
|
values = claim
|
||||||
else:
|
elif isinstance(claim, str):
|
||||||
values = claim.split()
|
values = claim.split()
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
if attribute_key in attributes:
|
if attribute_key in attributes:
|
||||||
attributes[attribute_key].extend(values)
|
attributes[attribute_key].extend(values)
|
||||||
|
|
|
||||||
|
|
@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta
|
||||||
|
|
||||||
from starlette.types import ASGIApp, Receive, Scope, Send
|
from starlette.types import ASGIApp, Receive, Scope, Send
|
||||||
|
|
||||||
|
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.kvstore.api import KVStore
|
from llama_stack.providers.utils.kvstore.api import KVStore
|
||||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
|
||||||
from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
|
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="core::server")
|
logger = get_logger(name=__name__, category="core::server")
|
||||||
|
|
||||||
|
|
@ -33,7 +33,7 @@ class QuotaMiddleware:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
app: ASGIApp,
|
app: ASGIApp,
|
||||||
kv_config: KVStoreConfig,
|
kv_config: KVStoreReference,
|
||||||
anonymous_max_requests: int,
|
anonymous_max_requests: int,
|
||||||
authenticated_max_requests: int,
|
authenticated_max_requests: int,
|
||||||
window_seconds: int = 86400,
|
window_seconds: int = 86400,
|
||||||
|
|
@ -45,15 +45,15 @@ class QuotaMiddleware:
|
||||||
self.authenticated_max_requests = authenticated_max_requests
|
self.authenticated_max_requests = authenticated_max_requests
|
||||||
self.window_seconds = window_seconds
|
self.window_seconds = window_seconds
|
||||||
|
|
||||||
if isinstance(self.kv_config, SqliteKVStoreConfig):
|
async def _get_kv(self) -> KVStore:
|
||||||
|
if self.kv is None:
|
||||||
|
self.kv = await kvstore_impl(self.kv_config)
|
||||||
|
backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
|
||||||
|
if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
|
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
|
||||||
f"window_seconds={self.window_seconds}"
|
f"window_seconds={self.window_seconds}"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _get_kv(self) -> KVStore:
|
|
||||||
if self.kv is None:
|
|
||||||
self.kv = await kvstore_impl(self.kv_config)
|
|
||||||
return self.kv
|
return self.kv
|
||||||
|
|
||||||
async def __call__(self, scope: Scope, receive: Receive, send: Send):
|
async def __call__(self, scope: Scope, receive: Receive, send: Send):
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,16 @@ from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceI
|
||||||
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
|
||||||
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
|
||||||
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
|
||||||
|
from llama_stack.core.storage.datatypes import (
|
||||||
|
InferenceStoreReference,
|
||||||
|
KVStoreReference,
|
||||||
|
ServerStoresConfig,
|
||||||
|
SqliteKVStoreConfig,
|
||||||
|
SqliteSqlStoreConfig,
|
||||||
|
SqlStoreReference,
|
||||||
|
StorageBackendConfig,
|
||||||
|
StorageConfig,
|
||||||
|
)
|
||||||
from llama_stack.core.store.registry import create_dist_registry
|
from llama_stack.core.store.registry import create_dist_registry
|
||||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
|
@ -357,6 +367,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
|
||||||
impls[Api.conversations] = conversations_impl
|
impls[Api.conversations] = conversations_impl
|
||||||
|
|
||||||
|
|
||||||
|
def _initialize_storage(run_config: StackRunConfig):
|
||||||
|
kv_backends: dict[str, StorageBackendConfig] = {}
|
||||||
|
sql_backends: dict[str, StorageBackendConfig] = {}
|
||||||
|
for backend_name, backend_config in run_config.storage.backends.items():
|
||||||
|
type = backend_config.type.value
|
||||||
|
if type.startswith("kv_"):
|
||||||
|
kv_backends[backend_name] = backend_config
|
||||||
|
elif type.startswith("sql_"):
|
||||||
|
sql_backends[backend_name] = backend_config
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown storage backend type: {type}")
|
||||||
|
|
||||||
|
from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
|
||||||
|
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
|
||||||
|
|
||||||
|
register_kvstore_backends(kv_backends)
|
||||||
|
register_sqlstore_backends(sql_backends)
|
||||||
|
|
||||||
|
|
||||||
class Stack:
|
class Stack:
|
||||||
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
|
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
|
||||||
self.run_config = run_config
|
self.run_config = run_config
|
||||||
|
|
@ -375,7 +404,11 @@ class Stack:
|
||||||
TEST_RECORDING_CONTEXT.__enter__()
|
TEST_RECORDING_CONTEXT.__enter__()
|
||||||
logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
|
logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
|
||||||
|
|
||||||
dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
|
_initialize_storage(self.run_config)
|
||||||
|
stores = self.run_config.storage.stores
|
||||||
|
if not stores.metadata:
|
||||||
|
raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
|
||||||
|
dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
|
||||||
policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
|
policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
|
||||||
|
|
||||||
internal_impls = {}
|
internal_impls = {}
|
||||||
|
|
@ -516,5 +549,16 @@ def run_config_from_adhoc_config_spec(
|
||||||
image_name="distro-test",
|
image_name="distro-test",
|
||||||
apis=list(provider_configs_by_api.keys()),
|
apis=list(provider_configs_by_api.keys()),
|
||||||
providers=provider_configs_by_api,
|
providers=provider_configs_by_api,
|
||||||
|
storage=StorageConfig(
|
||||||
|
backends={
|
||||||
|
"kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"),
|
||||||
|
"sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"),
|
||||||
|
},
|
||||||
|
stores=ServerStoresConfig(
|
||||||
|
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
|
||||||
|
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
|
||||||
|
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
|
||||||
|
),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
return config
|
return config
|
||||||
|
|
|
||||||
5
llama_stack/core/storage/__init__.py
Normal file
5
llama_stack/core/storage/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
283
llama_stack/core/storage/datatypes.py
Normal file
283
llama_stack/core/storage/datatypes.py
Normal file
|
|
@ -0,0 +1,283 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import re
|
||||||
|
from abc import abstractmethod
|
||||||
|
from enum import StrEnum
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Annotated, Literal
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
|
||||||
|
|
||||||
|
class StorageBackendType(StrEnum):
|
||||||
|
KV_REDIS = "kv_redis"
|
||||||
|
KV_SQLITE = "kv_sqlite"
|
||||||
|
KV_POSTGRES = "kv_postgres"
|
||||||
|
KV_MONGODB = "kv_mongodb"
|
||||||
|
SQL_SQLITE = "sql_sqlite"
|
||||||
|
SQL_POSTGRES = "sql_postgres"
|
||||||
|
|
||||||
|
|
||||||
|
class CommonConfig(BaseModel):
|
||||||
|
namespace: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="All keys will be prefixed with this namespace",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RedisKVStoreConfig(CommonConfig):
|
||||||
|
type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS
|
||||||
|
host: str = "localhost"
|
||||||
|
port: int = 6379
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self) -> str:
|
||||||
|
return f"redis://{self.host}:{self.port}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pip_packages(cls) -> list[str]:
|
||||||
|
return ["redis"]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls):
|
||||||
|
return {
|
||||||
|
"type": StorageBackendType.KV_REDIS.value,
|
||||||
|
"host": "${env.REDIS_HOST:=localhost}",
|
||||||
|
"port": "${env.REDIS_PORT:=6379}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SqliteKVStoreConfig(CommonConfig):
|
||||||
|
type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE
|
||||||
|
db_path: str = Field(
|
||||||
|
description="File path for the sqlite database",
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pip_packages(cls) -> list[str]:
|
||||||
|
return ["aiosqlite"]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
|
||||||
|
return {
|
||||||
|
"type": StorageBackendType.KV_SQLITE.value,
|
||||||
|
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PostgresKVStoreConfig(CommonConfig):
|
||||||
|
type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
|
||||||
|
host: str = "localhost"
|
||||||
|
port: int | str = 5432
|
||||||
|
db: str = "llamastack"
|
||||||
|
user: str
|
||||||
|
password: str | None = None
|
||||||
|
ssl_mode: str | None = None
|
||||||
|
ca_cert_path: str | None = None
|
||||||
|
table_name: str = "llamastack_kvstore"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
|
||||||
|
return {
|
||||||
|
"type": StorageBackendType.KV_POSTGRES.value,
|
||||||
|
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||||
|
"port": "${env.POSTGRES_PORT:=5432}",
|
||||||
|
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||||
|
"user": "${env.POSTGRES_USER:=llamastack}",
|
||||||
|
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
|
||||||
|
"table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@field_validator("table_name")
|
||||||
|
def validate_table_name(cls, v: str) -> str:
|
||||||
|
# PostgreSQL identifiers rules:
|
||||||
|
# - Must start with a letter or underscore
|
||||||
|
# - Can contain letters, numbers, and underscores
|
||||||
|
# - Maximum length is 63 bytes
|
||||||
|
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
|
||||||
|
if not re.match(pattern, v):
|
||||||
|
raise ValueError(
|
||||||
|
"Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
|
||||||
|
)
|
||||||
|
if len(v) > 63:
|
||||||
|
raise ValueError("Table name must be less than 63 characters")
|
||||||
|
return v
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pip_packages(cls) -> list[str]:
|
||||||
|
return ["psycopg2-binary"]
|
||||||
|
|
||||||
|
|
||||||
|
class MongoDBKVStoreConfig(CommonConfig):
|
||||||
|
type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB
|
||||||
|
host: str = "localhost"
|
||||||
|
port: int = 27017
|
||||||
|
db: str = "llamastack"
|
||||||
|
user: str | None = None
|
||||||
|
password: str | None = None
|
||||||
|
collection_name: str = "llamastack_kvstore"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pip_packages(cls) -> list[str]:
|
||||||
|
return ["pymongo"]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
|
||||||
|
return {
|
||||||
|
"type": StorageBackendType.KV_MONGODB.value,
|
||||||
|
"host": "${env.MONGODB_HOST:=localhost}",
|
||||||
|
"port": "${env.MONGODB_PORT:=5432}",
|
||||||
|
"db": "${env.MONGODB_DB}",
|
||||||
|
"user": "${env.MONGODB_USER}",
|
||||||
|
"password": "${env.MONGODB_PASSWORD}",
|
||||||
|
"collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SqlAlchemySqlStoreConfig(BaseModel):
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def engine_str(self) -> str: ...
|
||||||
|
|
||||||
|
# TODO: move this when we have a better way to specify dependencies with internal APIs
|
||||||
|
@classmethod
|
||||||
|
def pip_packages(cls) -> list[str]:
|
||||||
|
return ["sqlalchemy[asyncio]"]
|
||||||
|
|
||||||
|
|
||||||
|
class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||||
|
type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
|
||||||
|
db_path: str = Field(
|
||||||
|
description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def engine_str(self) -> str:
|
||||||
|
return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
|
||||||
|
return {
|
||||||
|
"type": StorageBackendType.SQL_SQLITE.value,
|
||||||
|
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pip_packages(cls) -> list[str]:
|
||||||
|
return super().pip_packages() + ["aiosqlite"]
|
||||||
|
|
||||||
|
|
||||||
|
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||||
|
type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
|
||||||
|
host: str = "localhost"
|
||||||
|
port: int | str = 5432
|
||||||
|
db: str = "llamastack"
|
||||||
|
user: str
|
||||||
|
password: str | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def engine_str(self) -> str:
|
||||||
|
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def pip_packages(cls) -> list[str]:
|
||||||
|
return super().pip_packages() + ["asyncpg"]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sample_run_config(cls, **kwargs):
|
||||||
|
return {
|
||||||
|
"type": StorageBackendType.SQL_POSTGRES.value,
|
||||||
|
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||||
|
"port": "${env.POSTGRES_PORT:=5432}",
|
||||||
|
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||||
|
"user": "${env.POSTGRES_USER:=llamastack}",
|
||||||
|
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# reference = (backend_name, table_name)
|
||||||
|
class SqlStoreReference(BaseModel):
|
||||||
|
"""A reference to a 'SQL-like' persistent store. A table name must be provided."""
|
||||||
|
|
||||||
|
table_name: str = Field(
|
||||||
|
description="Name of the table to use for the SqlStore",
|
||||||
|
)
|
||||||
|
|
||||||
|
backend: str = Field(
|
||||||
|
description="Name of backend from storage.backends",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# reference = (backend_name, namespace)
|
||||||
|
class KVStoreReference(BaseModel):
|
||||||
|
"""A reference to a 'key-value' persistent store. A namespace must be provided."""
|
||||||
|
|
||||||
|
namespace: str = Field(
|
||||||
|
description="Key prefix for KVStore backends",
|
||||||
|
)
|
||||||
|
|
||||||
|
backend: str = Field(
|
||||||
|
description="Name of backend from storage.backends",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
StorageBackendConfig = Annotated[
|
||||||
|
RedisKVStoreConfig
|
||||||
|
| SqliteKVStoreConfig
|
||||||
|
| PostgresKVStoreConfig
|
||||||
|
| MongoDBKVStoreConfig
|
||||||
|
| SqliteSqlStoreConfig
|
||||||
|
| PostgresSqlStoreConfig,
|
||||||
|
Field(discriminator="type"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class InferenceStoreReference(SqlStoreReference):
|
||||||
|
"""Inference store configuration with queue tuning."""
|
||||||
|
|
||||||
|
max_write_queue_size: int = Field(
|
||||||
|
default=10000,
|
||||||
|
description="Max queued writes for inference store",
|
||||||
|
)
|
||||||
|
num_writers: int = Field(
|
||||||
|
default=4,
|
||||||
|
description="Number of concurrent background writers",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ResponsesStoreReference(InferenceStoreReference):
|
||||||
|
"""Responses store configuration with queue tuning."""
|
||||||
|
|
||||||
|
|
||||||
|
class ServerStoresConfig(BaseModel):
|
||||||
|
metadata: KVStoreReference | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Metadata store configuration (uses KV backend)",
|
||||||
|
)
|
||||||
|
inference: InferenceStoreReference | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Inference store configuration (uses SQL backend)",
|
||||||
|
)
|
||||||
|
conversations: SqlStoreReference | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Conversations store configuration (uses SQL backend)",
|
||||||
|
)
|
||||||
|
responses: ResponsesStoreReference | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Responses store configuration (uses SQL backend)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StorageConfig(BaseModel):
|
||||||
|
backends: dict[str, StorageBackendConfig] = Field(
|
||||||
|
description="Named backend configurations (e.g., 'default', 'cache')",
|
||||||
|
)
|
||||||
|
stores: ServerStoresConfig = Field(
|
||||||
|
default_factory=lambda: ServerStoresConfig(),
|
||||||
|
description="Named references to storage backends used by the stack core",
|
||||||
|
)
|
||||||
|
|
@ -11,10 +11,9 @@ from typing import Protocol
|
||||||
import pydantic
|
import pydantic
|
||||||
|
|
||||||
from llama_stack.core.datatypes import RoutableObjectWithProvider
|
from llama_stack.core.datatypes import RoutableObjectWithProvider
|
||||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.core.storage.datatypes import KVStoreReference
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
|
||||||
|
|
||||||
logger = get_logger(__name__, category="core::registry")
|
logger = get_logger(__name__, category="core::registry")
|
||||||
|
|
||||||
|
|
@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
||||||
|
|
||||||
|
|
||||||
async def create_dist_registry(
|
async def create_dist_registry(
|
||||||
metadata_store: KVStoreConfig | None,
|
metadata_store: KVStoreReference, image_name: str
|
||||||
image_name: str,
|
|
||||||
) -> tuple[CachedDiskDistributionRegistry, KVStore]:
|
) -> tuple[CachedDiskDistributionRegistry, KVStore]:
|
||||||
# instantiate kvstore for storing and retrieving distribution metadata
|
# instantiate kvstore for storing and retrieving distribution metadata
|
||||||
if metadata_store:
|
|
||||||
dist_kvstore = await kvstore_impl(metadata_store)
|
dist_kvstore = await kvstore_impl(metadata_store)
|
||||||
else:
|
|
||||||
dist_kvstore = await kvstore_impl(
|
|
||||||
SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
|
|
||||||
)
|
|
||||||
dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
|
dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
|
||||||
await dist_registry.initialize()
|
await dist_registry.initialize()
|
||||||
return dist_registry, dist_kvstore
|
return dist_registry, dist_kvstore
|
||||||
|
|
|
||||||
|
|
@ -42,25 +42,25 @@ def resolve_config_or_distro(
|
||||||
# Strategy 1: Try as file path first
|
# Strategy 1: Try as file path first
|
||||||
config_path = Path(config_or_distro)
|
config_path = Path(config_or_distro)
|
||||||
if config_path.exists() and config_path.is_file():
|
if config_path.exists() and config_path.is_file():
|
||||||
logger.info(f"Using file path: {config_path}")
|
logger.debug(f"Using file path: {config_path}")
|
||||||
return config_path.resolve()
|
return config_path.resolve()
|
||||||
|
|
||||||
# Strategy 2: Try as distribution name (if no .yaml extension)
|
# Strategy 2: Try as distribution name (if no .yaml extension)
|
||||||
if not config_or_distro.endswith(".yaml"):
|
if not config_or_distro.endswith(".yaml"):
|
||||||
distro_config = _get_distro_config_path(config_or_distro, mode)
|
distro_config = _get_distro_config_path(config_or_distro, mode)
|
||||||
if distro_config.exists():
|
if distro_config.exists():
|
||||||
logger.info(f"Using distribution: {distro_config}")
|
logger.debug(f"Using distribution: {distro_config}")
|
||||||
return distro_config
|
return distro_config
|
||||||
|
|
||||||
# Strategy 3: Try as built distribution name
|
# Strategy 3: Try as built distribution name
|
||||||
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||||
if distrib_config.exists():
|
if distrib_config.exists():
|
||||||
logger.info(f"Using built distribution: {distrib_config}")
|
logger.debug(f"Using built distribution: {distrib_config}")
|
||||||
return distrib_config
|
return distrib_config
|
||||||
|
|
||||||
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||||
if distrib_config.exists():
|
if distrib_config.exists():
|
||||||
logger.info(f"Using built distribution: {distrib_config}")
|
logger.debug(f"Using built distribution: {distrib_config}")
|
||||||
return distrib_config
|
return distrib_config
|
||||||
|
|
||||||
# Strategy 4: Failed - provide helpful error
|
# Strategy 4: Failed - provide helpful error
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue