mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
Merge remote-tracking branch 'origin/main' into stores
This commit is contained in:
commit
490b212576
89 changed files with 19353 additions and 8323 deletions
19
.dockerignore
Normal file
19
.dockerignore
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
.venv
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
*.pyd
|
||||||
|
*.so
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
htmlcov*
|
||||||
|
.coverage
|
||||||
|
coverage*
|
||||||
|
.cache
|
||||||
|
.mypy_cache
|
||||||
|
.pytest_cache
|
||||||
|
.ruff_cache
|
||||||
|
uv.lock
|
||||||
|
node_modules
|
||||||
|
build
|
||||||
|
/tmp
|
||||||
|
|
@ -57,7 +57,7 @@ runs:
|
||||||
echo "Building Llama Stack"
|
echo "Building Llama Stack"
|
||||||
|
|
||||||
LLAMA_STACK_DIR=. \
|
LLAMA_STACK_DIR=. \
|
||||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Configure git for commits
|
- name: Configure git for commits
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -14,6 +14,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||||
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||||
|
| Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
|
||||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||||
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
||||||
|
|
|
||||||
7
.github/workflows/install-script-ci.yml
vendored
7
.github/workflows/install-script-ci.yml
vendored
|
|
@ -30,8 +30,11 @@ jobs:
|
||||||
|
|
||||||
- name: Build a single provider
|
- name: Build a single provider
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync \
|
docker build . \
|
||||||
llama stack build --template starter --image-type container --image-name test
|
-f containers/Containerfile \
|
||||||
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=starter \
|
||||||
|
--tag llama-stack:starter-ci
|
||||||
|
|
||||||
- name: Run installer end-to-end
|
- name: Run installer end-to-end
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
4
.github/workflows/integration-tests.yml
vendored
4
.github/workflows/integration-tests.yml
vendored
|
|
@ -47,7 +47,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server, docker]
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
|
|
@ -82,7 +82,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
OPENAI_API_KEY: dummy
|
OPENAI_API_KEY: dummy
|
||||||
with:
|
with:
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
|
||||||
setup: ${{ matrix.config.setup }}
|
setup: ${{ matrix.config.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: ${{ matrix.config.suite }}
|
suite: ${{ matrix.config.suite }}
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ jobs:
|
||||||
|
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
run: |
|
run: |
|
||||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Check Storage and Memory Available Before Tests
|
- name: Check Storage and Memory Available Before Tests
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|
|
||||||
57
.github/workflows/providers-build.yml
vendored
57
.github/workflows/providers-build.yml
vendored
|
|
@ -14,6 +14,8 @@ on:
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/distributions/**'
|
- 'llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
- 'containers/Containerfile'
|
||||||
|
- '.dockerignore'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
|
|
@ -24,6 +26,8 @@ on:
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/distributions/**'
|
- 'llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
- 'containers/Containerfile'
|
||||||
|
- '.dockerignore'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
|
|
@ -60,15 +64,19 @@ jobs:
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
- name: Print build dependencies
|
- name: Install distribution into venv
|
||||||
|
if: matrix.image-type == 'venv'
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
|
uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Run Llama Stack Build
|
- name: Build container image
|
||||||
|
if: matrix.image-type == 'container'
|
||||||
run: |
|
run: |
|
||||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
docker build . \
|
||||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
-f containers/Containerfile \
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=${{ matrix.distro }} \
|
||||||
|
--tag llama-stack:${{ matrix.distro }}-ci
|
||||||
|
|
||||||
- name: Print dependencies in the image
|
- name: Print dependencies in the image
|
||||||
if: matrix.image-type == 'venv'
|
if: matrix.image-type == 'venv'
|
||||||
|
|
@ -86,8 +94,8 @@ jobs:
|
||||||
|
|
||||||
- name: Build a single provider
|
- name: Build a single provider
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
|
uv pip install -e .
|
||||||
|
uv run --no-sync llama stack list-deps --providers inference=remote::ollama | xargs -L1 uv pip install
|
||||||
build-custom-container-distribution:
|
build-custom-container-distribution:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -97,11 +105,16 @@ jobs:
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
uses: ./.github/actions/setup-runner
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
- name: Build a single provider
|
- name: Build container image
|
||||||
run: |
|
run: |
|
||||||
yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
|
||||||
yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
|
docker build . \
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
-f containers/Containerfile \
|
||||||
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=ci-tests \
|
||||||
|
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||||
|
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||||
|
-t llama-stack:ci-tests
|
||||||
|
|
||||||
- name: Inspect the container image entrypoint
|
- name: Inspect the container image entrypoint
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -112,7 +125,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
@ -129,17 +142,19 @@ jobs:
|
||||||
- name: Pin distribution to UBI9 base
|
- name: Pin distribution to UBI9 base
|
||||||
run: |
|
run: |
|
||||||
yq -i '
|
yq -i '
|
||||||
.image_type = "container" |
|
|
||||||
.image_name = "ubi9-test" |
|
|
||||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||||
' llama_stack/distributions/ci-tests/build.yaml
|
' llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
||||||
- name: Build dev container (UBI9)
|
- name: Build UBI9 container image
|
||||||
env:
|
|
||||||
USE_COPY_NOT_MOUNT: "true"
|
|
||||||
LLAMA_STACK_DIR: "."
|
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
|
||||||
|
docker build . \
|
||||||
|
-f containers/Containerfile \
|
||||||
|
--build-arg INSTALL_MODE=editable \
|
||||||
|
--build-arg DISTRO_NAME=ci-tests \
|
||||||
|
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||||
|
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||||
|
-t llama-stack:ci-tests-ubi9
|
||||||
|
|
||||||
- name: Inspect UBI9 image
|
- name: Inspect UBI9 image
|
||||||
run: |
|
run: |
|
||||||
|
|
@ -150,7 +165,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||||
echo "Entrypoint: $entrypoint"
|
echo "Entrypoint: $entrypoint"
|
||||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||||
echo "Entrypoint is not correct"
|
echo "Entrypoint is not correct"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
|
|
@ -0,0 +1,105 @@
|
||||||
|
name: Test llama stack list-deps
|
||||||
|
|
||||||
|
run-name: Test llama stack list-deps
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/cli/stack/list_deps.py'
|
||||||
|
- 'llama_stack/cli/stack/_list_deps.py'
|
||||||
|
- 'llama_stack/core/build.*'
|
||||||
|
- 'llama_stack/core/*.sh'
|
||||||
|
- '.github/workflows/providers-list-deps.yml'
|
||||||
|
- 'llama_stack/templates/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- 'llama_stack/cli/stack/list_deps.py'
|
||||||
|
- 'llama_stack/cli/stack/_list_deps.py'
|
||||||
|
- 'llama_stack/core/build.*'
|
||||||
|
- 'llama_stack/core/*.sh'
|
||||||
|
- '.github/workflows/providers-list-deps.yml'
|
||||||
|
- 'llama_stack/templates/**'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
generate-matrix:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Generate Distribution List
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||||
|
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
list-deps:
|
||||||
|
needs: generate-matrix
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
|
||||||
|
image-type: [venv, container]
|
||||||
|
fail-fast: false # We want to run all jobs even if some fail
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
|
- name: Print dependencies
|
||||||
|
run: |
|
||||||
|
uv run llama stack list-deps ${{ matrix.distro }}
|
||||||
|
|
||||||
|
- name: Install Distro using llama stack list-deps
|
||||||
|
run: |
|
||||||
|
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
||||||
|
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
||||||
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
- name: Print dependencies in the image
|
||||||
|
if: matrix.image-type == 'venv'
|
||||||
|
run: |
|
||||||
|
uv pip list
|
||||||
|
|
||||||
|
show-single-provider:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
|
- name: Show a single provider
|
||||||
|
run: |
|
||||||
|
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps --providers inference=remote::ollama
|
||||||
|
|
||||||
|
list-deps-from-config:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
uses: ./.github/actions/setup-runner
|
||||||
|
|
||||||
|
- name: list-des from Config
|
||||||
|
env:
|
||||||
|
USE_COPY_NOT_MOUNT: "true"
|
||||||
|
LLAMA_STACK_DIR: "."
|
||||||
|
run: |
|
||||||
|
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
@ -46,9 +46,9 @@ jobs:
|
||||||
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
||||||
cat tests/external/ramalama-stack/run.yaml
|
cat tests/external/ramalama-stack/run.yaml
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Install distribution dependencies
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
|
uv run llama stack list-deps tests/external/ramalama-stack/build.yaml | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
|
||||||
7
.github/workflows/test-external.yml
vendored
7
.github/workflows/test-external.yml
vendored
|
|
@ -44,11 +44,14 @@ jobs:
|
||||||
|
|
||||||
- name: Print distro dependencies
|
- name: Print distro dependencies
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml --print-deps-only
|
uv run --no-sync llama stack list-deps tests/external/build.yaml
|
||||||
|
|
||||||
- name: Build distro from config file
|
- name: Build distro from config file
|
||||||
run: |
|
run: |
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml
|
uv venv ci-test
|
||||||
|
source ci-test/bin/activate
|
||||||
|
uv pip install -e .
|
||||||
|
LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/build.yaml | xargs -L1 uv pip install
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
- name: Start Llama Stack server in background
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
if: ${{ matrix.image-type }} == 'venv'
|
||||||
|
|
|
||||||
|
|
@ -167,9 +167,9 @@ under the LICENSE file in the root directory of this source tree.
|
||||||
|
|
||||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||||
|
|
||||||
### Using `llama stack build`
|
### Installing dependencies of distributions
|
||||||
|
|
||||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
When installing dependencies for a distribution, you can use `llama stack list-deps` to view and install the required packages.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -177,7 +177,12 @@ cd work/
|
||||||
git clone https://github.com/llamastack/llama-stack.git
|
git clone https://github.com/llamastack/llama-stack.git
|
||||||
git clone https://github.com/llamastack/llama-stack-client-python.git
|
git clone https://github.com/llamastack/llama-stack-client-python.git
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
|
||||||
|
# Show dependencies for a distribution
|
||||||
|
llama stack list-deps <distro-name>
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
|
|
||||||
### Updating distribution configurations
|
### Updating distribution configurations
|
||||||
|
|
|
||||||
|
|
@ -27,8 +27,11 @@ MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||||
# get meta url from llama.com
|
# get meta url from llama.com
|
||||||
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
||||||
|
|
||||||
|
# install dependencies for the distribution
|
||||||
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
|
|
||||||
# start a llama stack server
|
# start a llama stack server
|
||||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
|
||||||
|
|
||||||
# install client to interact with the server
|
# install client to interact with the server
|
||||||
pip install llama-stack-client
|
pip install llama-stack-client
|
||||||
|
|
@ -89,7 +92,7 @@ As more providers start supporting Llama 4, you can use them in Llama Stack as w
|
||||||
To try Llama Stack locally, run:
|
To try Llama Stack locally, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
|
curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh | bash
|
||||||
```
|
```
|
||||||
|
|
||||||
### Overview
|
### Overview
|
||||||
|
|
|
||||||
136
containers/Containerfile
Normal file
136
containers/Containerfile
Normal file
|
|
@ -0,0 +1,136 @@
|
||||||
|
# syntax=docker/dockerfile:1.6
|
||||||
|
#
|
||||||
|
# This Dockerfile is used to build the Llama Stack container image.
|
||||||
|
# Example:
|
||||||
|
# docker build \
|
||||||
|
# -f containers/Containerfile \
|
||||||
|
# --build-arg DISTRO_NAME=starter \
|
||||||
|
# --tag llama-stack:starter .
|
||||||
|
|
||||||
|
ARG BASE_IMAGE=python:3.12-slim
|
||||||
|
FROM ${BASE_IMAGE}
|
||||||
|
|
||||||
|
ARG INSTALL_MODE="pypi"
|
||||||
|
ARG LLAMA_STACK_DIR="/workspace"
|
||||||
|
ARG LLAMA_STACK_CLIENT_DIR=""
|
||||||
|
ARG PYPI_VERSION=""
|
||||||
|
ARG TEST_PYPI_VERSION=""
|
||||||
|
ARG KEEP_WORKSPACE=""
|
||||||
|
ARG DISTRO_NAME="starter"
|
||||||
|
ARG RUN_CONFIG_PATH=""
|
||||||
|
ARG UV_HTTP_TIMEOUT=500
|
||||||
|
ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1
|
||||||
|
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN set -eux; \
|
||||||
|
if command -v dnf >/dev/null 2>&1; then \
|
||||||
|
dnf -y update && \
|
||||||
|
dnf install -y iputils git net-tools wget \
|
||||||
|
vim-minimal python3.12 python3.12-pip python3.12-wheel \
|
||||||
|
python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
|
||||||
|
ln -sf /usr/bin/pip3.12 /usr/local/bin/pip && \
|
||||||
|
ln -sf /usr/bin/python3.12 /usr/local/bin/python && \
|
||||||
|
dnf clean all; \
|
||||||
|
elif command -v apt-get >/dev/null 2>&1; then \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
iputils-ping net-tools iproute2 dnsutils telnet \
|
||||||
|
curl wget git procps psmisc lsof traceroute bubblewrap \
|
||||||
|
gcc g++ && \
|
||||||
|
rm -rf /var/lib/apt/lists/*; \
|
||||||
|
else \
|
||||||
|
echo "Unsupported base image: expected dnf or apt-get" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir uv
|
||||||
|
ENV UV_SYSTEM_PYTHON=1
|
||||||
|
|
||||||
|
ENV INSTALL_MODE=${INSTALL_MODE}
|
||||||
|
ENV LLAMA_STACK_DIR=${LLAMA_STACK_DIR}
|
||||||
|
ENV LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR}
|
||||||
|
ENV PYPI_VERSION=${PYPI_VERSION}
|
||||||
|
ENV TEST_PYPI_VERSION=${TEST_PYPI_VERSION}
|
||||||
|
ENV KEEP_WORKSPACE=${KEEP_WORKSPACE}
|
||||||
|
ENV DISTRO_NAME=${DISTRO_NAME}
|
||||||
|
ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH}
|
||||||
|
|
||||||
|
# Copy the repository so editable installs and run configurations are available.
|
||||||
|
COPY . /workspace
|
||||||
|
|
||||||
|
# Install llama-stack
|
||||||
|
RUN set -eux; \
|
||||||
|
if [ "$INSTALL_MODE" = "editable" ]; then \
|
||||||
|
if [ ! -d "$LLAMA_STACK_DIR" ]; then \
|
||||||
|
echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
|
||||||
|
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
|
||||||
|
uv pip install --no-cache-dir fastapi libcst; \
|
||||||
|
if [ -n "$TEST_PYPI_VERSION" ]; then \
|
||||||
|
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
|
||||||
|
else \
|
||||||
|
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
|
||||||
|
fi; \
|
||||||
|
else \
|
||||||
|
if [ -n "$PYPI_VERSION" ]; then \
|
||||||
|
uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
|
||||||
|
else \
|
||||||
|
uv pip install --no-cache-dir llama-stack; \
|
||||||
|
fi; \
|
||||||
|
fi;
|
||||||
|
|
||||||
|
# Install the client package if it is provided
|
||||||
|
RUN set -eux; \
|
||||||
|
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||||
|
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||||
|
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
|
||||||
|
fi;
|
||||||
|
|
||||||
|
# Install the dependencies for the distribution
|
||||||
|
RUN set -eux; \
|
||||||
|
if [ -z "$DISTRO_NAME" ]; then \
|
||||||
|
echo "DISTRO_NAME must be provided" >&2; \
|
||||||
|
exit 1; \
|
||||||
|
fi; \
|
||||||
|
deps="$(llama stack list-deps "$DISTRO_NAME")"; \
|
||||||
|
if [ -n "$deps" ]; then \
|
||||||
|
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
RUN set -eux; \
|
||||||
|
pip uninstall -y uv; \
|
||||||
|
should_remove=1; \
|
||||||
|
if [ -n "$KEEP_WORKSPACE" ]; then should_remove=0; fi; \
|
||||||
|
if [ "$INSTALL_MODE" = "editable" ]; then should_remove=0; fi; \
|
||||||
|
case "$RUN_CONFIG_PATH" in \
|
||||||
|
/workspace*) should_remove=0 ;; \
|
||||||
|
esac; \
|
||||||
|
if [ "$should_remove" -eq 1 ] && [ -d /workspace ]; then rm -rf /workspace; fi
|
||||||
|
|
||||||
|
RUN cat <<'EOF' >/usr/local/bin/llama-stack-entrypoint.sh
|
||||||
|
#!/bin/sh
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ -n "$RUN_CONFIG_PATH" ] && [ -f "$RUN_CONFIG_PATH" ]; then
|
||||||
|
exec llama stack run "$RUN_CONFIG_PATH" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "$DISTRO_NAME" ]; then
|
||||||
|
exec llama stack run "$DISTRO_NAME" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec llama stack run "$@"
|
||||||
|
EOF
|
||||||
|
RUN chmod +x /usr/local/bin/llama-stack-entrypoint.sh
|
||||||
|
|
||||||
|
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/llama-stack-entrypoint.sh"]
|
||||||
|
|
@ -51,8 +51,8 @@ device: cpu
|
||||||
You can access the HuggingFace trainer via the `starter` distribution:
|
You can access the HuggingFace trainer via the `starter` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage Example
|
### Usage Example
|
||||||
|
|
|
||||||
|
|
@ -175,8 +175,7 @@ llama-stack-client benchmarks register \
|
||||||
**1. Start the Llama Stack API Server**
|
**1. Start the Llama Stack API Server**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Build and run a distribution (example: together)
|
llama stack list-deps together | xargs -L1 uv pip install
|
||||||
llama stack build --distro together --image-type venv
|
|
||||||
llama stack run together
|
llama stack run together
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -209,7 +208,7 @@ The playground works with any Llama Stack distribution. Popular options include:
|
||||||
<TabItem value="together" label="Together AI">
|
<TabItem value="together" label="Together AI">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro together --image-type venv
|
llama stack list-deps together | xargs -L1 uv pip install
|
||||||
llama stack run together
|
llama stack run together
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -222,7 +221,7 @@ llama stack run together
|
||||||
<TabItem value="ollama" label="Ollama (Local)">
|
<TabItem value="ollama" label="Ollama (Local)">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro ollama --image-type venv
|
llama stack list-deps ollama | xargs -L1 uv pip install
|
||||||
llama stack run ollama
|
llama stack run ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -235,7 +234,7 @@ llama stack run ollama
|
||||||
<TabItem value="meta-reference" label="Meta Reference">
|
<TabItem value="meta-reference" label="Meta Reference">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro meta-reference --image-type venv
|
llama stack list-deps meta-reference | xargs -L1 uv pip install
|
||||||
llama stack run meta-reference
|
llama stack run meta-reference
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,8 @@ RAG enables your applications to reference and recall information from external
|
||||||
In one terminal, start the Llama Stack server:
|
In one terminal, start the Llama Stack server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run llama stack build --distro starter --image-type venv --run
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Connect with OpenAI Client
|
### 2. Connect with OpenAI Client
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,10 @@ The new `/v2` API must be introduced alongside the existing `/v1` API and run in
|
||||||
|
|
||||||
When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
|
When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
|
||||||
|
|
||||||
|
### Deprecated APIs
|
||||||
|
|
||||||
|
Deprecated APIs are those that are no longer actively maintained or supported. Depreated APIs are marked with the flag `deprecated = True` in the OpenAPI spec. These APIs will be removed in a future release.
|
||||||
|
|
||||||
### API Stability vs. Provider Stability
|
### API Stability vs. Provider Stability
|
||||||
|
|
||||||
The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
|
The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
|
||||||
|
|
|
||||||
|
|
@ -158,17 +158,16 @@ under the LICENSE file in the root directory of this source tree.
|
||||||
|
|
||||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||||
|
|
||||||
### Using `llama stack build`
|
### Setup for development
|
||||||
|
|
||||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```bash
|
```bash
|
||||||
cd work/
|
|
||||||
git clone https://github.com/meta-llama/llama-stack.git
|
git clone https://github.com/meta-llama/llama-stack.git
|
||||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
|
||||||
cd llama-stack
|
cd llama-stack
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
uv run llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# (Optional) If you are developing the llama-stack-client-python package, you can add it as an editable package.
|
||||||
|
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||||
|
uv add --editable ../llama-stack-client-python
|
||||||
```
|
```
|
||||||
|
|
||||||
### Updating distribution configurations
|
### Updating distribution configurations
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ def get_base_url(self) -> str:
|
||||||
|
|
||||||
## Testing the Provider
|
## Testing the Provider
|
||||||
|
|
||||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
|
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, install its dependencies with `llama stack list-deps together | xargs -L1 uv pip install`.
|
||||||
|
|
||||||
### 1. Integration Testing
|
### 1. Integration Testing
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,225 +5,79 @@ sidebar_label: Build your own Distribution
|
||||||
sidebar_position: 3
|
sidebar_position: 3
|
||||||
---
|
---
|
||||||
|
|
||||||
This guide will walk you through the steps to get started with building a Llama Stack distribution from scratch with your choice of API providers.
|
This guide walks you through inspecting existing distributions, customising their configuration, and building runnable artefacts for your own deployment.
|
||||||
|
|
||||||
|
### Explore existing distributions
|
||||||
|
|
||||||
### Setting your log level
|
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
||||||
|
|
||||||
In order to specify the proper logging level users can apply the following environment variable `LLAMA_STACK_LOGGING` with the following format:
|
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
||||||
|
- `run.yaml` – sample run configuration (when provided).
|
||||||
|
- Documentation fragments that power this site.
|
||||||
|
|
||||||
`LLAMA_STACK_LOGGING=server=debug;core=info`
|
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
||||||
|
|
||||||
Where each category in the following list:
|
|
||||||
|
|
||||||
- all
|
|
||||||
- core
|
|
||||||
- server
|
|
||||||
- router
|
|
||||||
- inference
|
|
||||||
- agents
|
|
||||||
- safety
|
|
||||||
- eval
|
|
||||||
- tools
|
|
||||||
- client
|
|
||||||
|
|
||||||
Can be set to any of the following log levels:
|
|
||||||
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warning
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
|
|
||||||
The default global log level is `info`. `all` sets the log level for all components.
|
|
||||||
|
|
||||||
A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
|
|
||||||
|
|
||||||
### Llama Stack Build
|
|
||||||
|
|
||||||
In order to build your own distribution, we recommend you clone the `llama-stack` repository.
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
git clone git@github.com:meta-llama/llama-stack.git
|
|
||||||
cd llama-stack
|
|
||||||
pip install -e .
|
|
||||||
```
|
|
||||||
Use the CLI to build your distribution.
|
|
||||||
The main points to consider are:
|
|
||||||
1. **Image Type** - Do you want a venv environment or a Container (eg. Docker)
|
|
||||||
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
|
|
||||||
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build -h
|
|
||||||
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--distro DISTRIBUTION] [--list-distros] [--image-type {container,venv}] [--image-name IMAGE_NAME] [--print-deps-only]
|
|
||||||
[--run] [--providers PROVIDERS]
|
|
||||||
|
|
||||||
Build a Llama stack container
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to
|
|
||||||
enter information interactively (default: None)
|
|
||||||
--template TEMPLATE (deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default:
|
|
||||||
None)
|
|
||||||
--distro DISTRIBUTION, --distribution DISTRIBUTION
|
|
||||||
Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default: None)
|
|
||||||
--list-distros, --list-distributions
|
|
||||||
Show the available distributions for building a Llama Stack distribution (default: False)
|
|
||||||
--image-type {container,venv}
|
|
||||||
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
|
|
||||||
--image-name IMAGE_NAME
|
|
||||||
[for image-type=container|venv] Name of the virtual environment to use for the build. If not specified, currently active environment will be used if found. (default:
|
|
||||||
None)
|
|
||||||
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
|
||||||
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
|
||||||
--providers PROVIDERS
|
|
||||||
Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per
|
|
||||||
API. (default: None)
|
|
||||||
```
|
|
||||||
|
|
||||||
After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.
|
|
||||||
|
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="template" label="Building from a template">
|
<TabItem value="container" label="Building a container">
|
||||||
To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
|
|
||||||
|
|
||||||
The following command will allow you to see the available templates and their corresponding providers.
|
Use the Containerfile at `containers/Containerfile`, which installs `llama-stack`, resolves distribution dependencies via `llama stack list-deps`, and sets the entrypoint to `llama stack run`.
|
||||||
```
|
|
||||||
llama stack build --list-templates
|
```bash
|
||||||
|
docker build . \
|
||||||
|
-f containers/Containerfile \
|
||||||
|
--build-arg DISTRO_NAME=starter \
|
||||||
|
--tag llama-stack:starter
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
Handy build arguments:
|
||||||
------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| Template Name | Description |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| watsonx | Use watsonx for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| vllm-gpu | Use a built-in vLLM engine for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| together | Use Together.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| tgi | Use (an external) TGI server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| starter | Quick start template for running Llama Stack with several popular providers |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| sambanova | Use SambaNova for running LLM inference and safety |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| remote-vllm | Use (an external) vLLM server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| postgres-demo | Quick start template for running Llama Stack with several popular providers |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| passthrough | Use Passthrough hosted llama-stack endpoint for LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| open-benchmark | Distribution for running open benchmarks |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| ollama | Use (an external) Ollama server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| nvidia | Use NVIDIA NIM for running LLM inference, evaluation and safety |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| meta-reference-gpu | Use Meta Reference for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| llama_api | Distribution for running e2e tests in CI |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| groq | Use Groq for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| fireworks | Use Fireworks.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| experimental-post-training | Experimental template for post training |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| dell | Dell's distribution of Llama Stack. TGI inference via Dell's custom |
|
|
||||||
| | container |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| ci-tests | Distribution for running e2e tests in CI |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| cerebras | Use Cerebras for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| bedrock | Use AWS Bedrock for running LLM inference and safety |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
You may then pick a template to build your distribution with providers fitted to your liking.
|
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
||||||
|
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/run.yaml`).
|
||||||
|
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
||||||
|
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
||||||
|
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
||||||
|
- `KEEP_WORKSPACE=1` – retain `/workspace` in the final image if you need to access additional files (such as sample configs or provider bundles).
|
||||||
|
|
||||||
For example, to build a distribution with TGI as the inference provider, you can run:
|
Make sure any custom `build.yaml`, run configs, or provider directories you reference are included in the Docker build context so the Containerfile can read them.
|
||||||
```
|
|
||||||
$ llama stack build --distro starter
|
|
||||||
...
|
|
||||||
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
|
||||||
```
|
|
||||||
|
|
||||||
```{tip}
|
|
||||||
The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
|
||||||
```
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="scratch" label="Building from Scratch">
|
<TabItem value="external" label="Building with external providers">
|
||||||
|
|
||||||
If the provided templates do not fit your use case, you could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations.
|
External providers live outside the main repository but can be bundled by pointing `external_providers_dir` to a directory that contains your provider packages.
|
||||||
|
|
||||||
It would be best to start with a template and understand the structure of the config file and the various concepts ( APIS, providers, resources, etc.) before starting from scratch.
|
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
||||||
```
|
2. Update `build.yaml` with the directory and provider entries.
|
||||||
llama stack build
|
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/run.yaml` if you want to bake the config.
|
||||||
|
|
||||||
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
|
Example `build.yaml` excerpt for a custom Ollama provider:
|
||||||
> Enter the image type you want your Llama Stack to be built as (container or venv): venv
|
|
||||||
|
|
||||||
Llama Stack is composed of several APIs working together. Let's select
|
|
||||||
the provider types (implementations) you want to use for these APIs.
|
|
||||||
|
|
||||||
Tip: use <TAB> to see options for the providers.
|
|
||||||
|
|
||||||
> Enter provider for API inference: inline::meta-reference
|
|
||||||
> Enter provider for API safety: inline::llama-guard
|
|
||||||
> Enter provider for API agents: inline::meta-reference
|
|
||||||
> Enter provider for API memory: inline::faiss
|
|
||||||
> Enter provider for API datasetio: inline::meta-reference
|
|
||||||
> Enter provider for API scoring: inline::meta-reference
|
|
||||||
> Enter provider for API eval: inline::meta-reference
|
|
||||||
> Enter provider for API telemetry: inline::meta-reference
|
|
||||||
|
|
||||||
> (Optional) Enter a short description for your Llama Stack:
|
|
||||||
|
|
||||||
You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml`
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="config" label="Building from a pre-existing build config file">
|
|
||||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
|
||||||
|
|
||||||
- The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --config llama_stack/distributions/starter/build.yaml
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="external" label="Building with External Providers">
|
|
||||||
|
|
||||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
|
|
||||||
|
|
||||||
To build a distribution with external providers, you need to:
|
|
||||||
|
|
||||||
1. Configure the `external_providers_dir` in your build configuration file:
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# Example my-external-stack.yaml with external providers
|
|
||||||
version: '2'
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Custom distro for CI tests
|
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::custom_ollama
|
- remote::custom_ollama
|
||||||
# Add more providers as needed
|
external_providers_dir: /workspace/providers.d
|
||||||
image_type: container
|
```
|
||||||
image_name: ci-test
|
|
||||||
# Path to external provider implementations
|
Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
|
||||||
external_providers_dir: ~/.llama/providers.d
|
|
||||||
|
```python
|
||||||
|
from llama_stack.providers.datatypes import ProviderSpec
|
||||||
|
|
||||||
|
|
||||||
|
def get_provider_spec() -> ProviderSpec:
|
||||||
|
return ProviderSpec(
|
||||||
|
provider_type="remote::custom_ollama",
|
||||||
|
module="llama_stack_ollama_provider",
|
||||||
|
config_class="llama_stack_ollama_provider.config.OllamaImplConfig",
|
||||||
|
pip_packages=[
|
||||||
|
"ollama",
|
||||||
|
"aiohttp",
|
||||||
|
"llama-stack-provider-ollama",
|
||||||
|
],
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
Here's an example for a custom Ollama provider:
|
Here's an example for a custom Ollama provider:
|
||||||
|
|
@ -245,53 +99,22 @@ The `pip_packages` section lists the Python packages required by the provider, a
|
||||||
provider package itself. The package must be available on PyPI or can be provided from a local
|
provider package itself. The package must be available on PyPI or can be provided from a local
|
||||||
directory or a git repository (git must be installed on the build environment).
|
directory or a git repository (git must be installed on the build environment).
|
||||||
|
|
||||||
2. Build your distribution using the config file:
|
For deeper guidance, see the [External Providers documentation](../providers/external/).
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --config my-external-stack.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external/).
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="container" label="Building Container">
|
</Tabs>
|
||||||
|
|
||||||
:::tip Podman Alternative
|
### Run your stack server
|
||||||
Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
|
||||||
:::
|
|
||||||
|
|
||||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
After building the image, launch it directly with Docker or Podman—the entrypoint calls `llama stack run` using the baked distribution or the bundled run config:
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --distro starter --image-type container
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama stack build --distro starter --image-type container
|
|
||||||
...
|
|
||||||
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
|
|
||||||
```
|
|
||||||
|
|
||||||
Now set some environment variables for the inference model ID and Llama Stack Port and create a local directory to mount into the container's file system.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export INFERENCE_MODEL="llama3.2:3b"
|
|
||||||
export LLAMA_STACK_PORT=8321
|
|
||||||
mkdir -p ~/.llama
|
|
||||||
```
|
|
||||||
|
|
||||||
After this step is successful, you should be able to find the built container image and test it with the below Docker command:
|
|
||||||
|
|
||||||
```
|
|
||||||
docker run -d \
|
docker run -d \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||||
localhost/distribution-ollama:dev \
|
llama-stack:starter \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -311,131 +134,14 @@ Here are the docker flags and their uses:
|
||||||
|
|
||||||
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
### Running your Stack server
|
If you prepared a custom run config, mount it into the container and reference it explicitly:
|
||||||
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack build` step.
|
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
-v $(pwd)/run.yaml:/app/run.yaml \
|
||||||
|
llama-stack:starter \
|
||||||
|
/app/run.yaml
|
||||||
```
|
```
|
||||||
llama stack run -h
|
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
|
||||||
[--image-type {venv}] [--enable-ui]
|
|
||||||
[config | distro]
|
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
|
||||||
--image-name IMAGE_NAME
|
|
||||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
|
||||||
--image-type {venv}
|
|
||||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
|
||||||
--enable-ui Start the UI server (default: False)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note:** Container images built with `llama stack build --image-type container` cannot be run using `llama stack run`. Instead, they must be run directly using Docker or Podman commands as shown in the container building section above.
|
|
||||||
|
|
||||||
```
|
|
||||||
# Start using template name
|
|
||||||
llama stack run tgi
|
|
||||||
|
|
||||||
# Start using config file
|
|
||||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
|
|
||||||
Serving API inspect
|
|
||||||
GET /health
|
|
||||||
GET /providers/list
|
|
||||||
GET /routes/list
|
|
||||||
Serving API inference
|
|
||||||
POST /inference/chat_completion
|
|
||||||
POST /inference/completion
|
|
||||||
POST /inference/embeddings
|
|
||||||
...
|
|
||||||
Serving API agents
|
|
||||||
POST /agents/create
|
|
||||||
POST /agents/session/create
|
|
||||||
POST /agents/turn/create
|
|
||||||
POST /agents/delete
|
|
||||||
POST /agents/session/delete
|
|
||||||
POST /agents/session/get
|
|
||||||
POST /agents/step/get
|
|
||||||
POST /agents/turn/get
|
|
||||||
|
|
||||||
Listening on ['::', '0.0.0.0']:8321
|
|
||||||
INFO: Started server process [2935911]
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
|
||||||
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
|
||||||
```
|
|
||||||
|
|
||||||
### Listing Distributions
|
|
||||||
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack list -h
|
|
||||||
usage: llama stack list [-h]
|
|
||||||
|
|
||||||
list the build stacks
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
```
|
|
||||||
|
|
||||||
Example Usage
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack list
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
------------------------------+-----------------------------------------------------------------+--------------+------------+
|
|
||||||
| Stack Name | Path | Build Config | Run Config |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| together | ~/.llama/distributions/together | Yes | No |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| bedrock | ~/.llama/distributions/bedrock | Yes | No |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| starter | ~/.llama/distributions/starter | Yes | Yes |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
| remote-vllm | ~/.llama/distributions/remote-vllm | Yes | Yes |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
### Removing a Distribution
|
|
||||||
Use the remove command to delete a distribution you've previously built.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack rm -h
|
|
||||||
usage: llama stack rm [-h] [--all] [name]
|
|
||||||
|
|
||||||
Remove the build stack
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
name Name of the stack to delete (default: None)
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--all, -a Delete all stacks (use with caution) (default: False)
|
|
||||||
```
|
|
||||||
|
|
||||||
Example
|
|
||||||
```
|
|
||||||
llama stack rm llamastack-test
|
|
||||||
```
|
|
||||||
|
|
||||||
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they're no longer needed.
|
|
||||||
|
|
||||||
### Troubleshooting
|
|
||||||
|
|
||||||
If you encounter any issues, ask questions in our discord or search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ This avoids the overhead of setting up a server.
|
||||||
```bash
|
```bash
|
||||||
# setup
|
# setup
|
||||||
uv pip install llama-stack
|
uv pip install llama-stack
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
|
||||||
uv venv starter --python 3.12
|
uv venv starter --python 3.12
|
||||||
source starter/bin/activate # On Windows: starter\Scripts\activate
|
source starter/bin/activate # On Windows: starter\Scripts\activate
|
||||||
pip install --no-cache llama-stack==0.2.2
|
pip install --no-cache llama-stack==0.2.2
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
export FIREWORKS_API_KEY=<SOME_KEY>
|
export FIREWORKS_API_KEY=<SOME_KEY>
|
||||||
llama stack run starter --port 5050
|
llama stack run starter --port 5050
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -166,10 +166,10 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
Install the distribution dependencies before launching:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro dell --image-type venv
|
llama stack list-deps dell | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
DEH_URL=$DEH_URL \
|
DEH_URL=$DEH_URL \
|
||||||
CHROMA_URL=$CHROMA_URL \
|
CHROMA_URL=$CHROMA_URL \
|
||||||
|
|
|
||||||
|
|
@ -81,10 +81,10 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
Make sure you have the Llama Stack CLI available.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro meta-reference-gpu --image-type venv
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
|
|
|
||||||
|
|
@ -136,11 +136,11 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack build --distro nvidia --image-type venv
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
|
|
|
||||||
|
|
@ -169,7 +169,11 @@ docker run \
|
||||||
Ensure you have configured the starter distribution using the environment variables explained above.
|
Ensure you have configured the starter distribution using the environment variables explained above.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
# Install dependencies for the starter distribution
|
||||||
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
uv run --with llama-stack llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example Usage
|
## Example Usage
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,17 @@ Another simple way to start interacting with Llama Stack is to just spin up a co
|
||||||
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](../deploying/kubernetes_deployment) for more details.
|
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](../deploying/kubernetes_deployment) for more details.
|
||||||
|
|
||||||
|
|
||||||
|
## Configure logging
|
||||||
|
|
||||||
|
Control log output via environment variables before starting the server.
|
||||||
|
|
||||||
|
- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`.
|
||||||
|
- Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
|
||||||
|
- Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
|
||||||
|
- `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.
|
||||||
|
|
||||||
|
Export these variables prior to running `llama stack run`, launching a container, or starting the server through any other pathway.
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
|
||||||
|
|
@ -58,15 +58,19 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="venv" label="Using venv">
|
<TabItem value="venv" label="Using venv">
|
||||||
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
You can use Python to install dependencies and run the Llama Stack server, which is useful for testing and development.
|
||||||
|
|
||||||
Llama Stack uses a [YAML configuration file](../distributions/configuration) to specify the stack setup,
|
Llama Stack uses a [YAML configuration file](../distributions/configuration) to specify the stack setup,
|
||||||
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml).
|
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml).
|
||||||
Now let's build and run the Llama Stack config for Ollama.
|
Now let's install dependencies and run the Llama Stack config for Ollama.
|
||||||
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv --run
|
# Install dependencies for the starter distribution
|
||||||
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
llama stack run starter
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="container" label="Using a Container">
|
<TabItem value="container" label="Using a Container">
|
||||||
|
|
@ -304,7 +308,7 @@ stream = agent.create_turn(
|
||||||
for event in AgentEventLogger().log(stream):
|
for event in AgentEventLogger().log(stream):
|
||||||
event.print()
|
event.print()
|
||||||
```
|
```
|
||||||
### ii. Run the Script
|
#### ii. Run the Script
|
||||||
Let's run the script using `uv`
|
Let's run the script using `uv`
|
||||||
```bash
|
```bash
|
||||||
uv run python agent.py
|
uv run python agent.py
|
||||||
|
|
|
||||||
|
|
@ -24,10 +24,13 @@ ollama run llama3.2:3b --keepalive 60m
|
||||||
|
|
||||||
#### Step 2: Run the Llama Stack server
|
#### Step 2: Run the Llama Stack server
|
||||||
|
|
||||||
We will use `uv` to run the Llama Stack server.
|
We will use `uv` to install dependencies and run the Llama Stack server.
|
||||||
```bash
|
```bash
|
||||||
OLLAMA_URL=http://localhost:11434 \
|
# Install dependencies for the starter distribution
|
||||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
|
# Run the server
|
||||||
|
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||||
```
|
```
|
||||||
#### Step 3: Run the demo
|
#### Step 3: Run the demo
|
||||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
---
|
---
|
||||||
description: "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
description: "Evaluations
|
||||||
|
|
||||||
|
Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||||
sidebar_label: Eval
|
sidebar_label: Eval
|
||||||
title: Eval
|
title: Eval
|
||||||
---
|
---
|
||||||
|
|
@ -8,6 +10,8 @@ title: Eval
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Evaluations
|
||||||
|
|
||||||
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **eval** API.
|
This section contains documentation for all available providers for the **eval** API.
|
||||||
|
|
|
||||||
|
|
@ -240,6 +240,6 @@ additional_pip_packages:
|
||||||
- sqlalchemy[asyncio]
|
- sqlalchemy[asyncio]
|
||||||
```
|
```
|
||||||
|
|
||||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
|
||||||
|
|
||||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,8 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro together\n",
|
"!uv run --with llama-stack llama stack list-deps together | xargs -L1 uv pip install\n",
|
||||||
|
"!uv run --with llama-stack llama stack run together\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -233,7 +233,8 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
|
"!uv run --with llama-stack llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install\n",
|
||||||
|
"!uv run --with llama-stack llama stack run meta-reference-gpu\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -223,7 +223,8 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro llama_api\n",
|
"!uv run --with llama-stack llama stack list-deps llama_api | xargs -L1 uv pip install\n",
|
||||||
|
"!uv run --with llama-stack llama stack run llama_api\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -2864,7 +2864,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
|
"!llama stack list-deps experimental-post-training | xargs -L1 uv pip install"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!pip install -U llama-stack\n",
|
"!pip install -U llama-stack\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
|
"llama stack list-deps fireworks | xargs -L1 uv pip install\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro together --image-type venv"
|
"!uv run llama stack list-deps together | xargs -L1 uv pip install\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,8 @@
|
||||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||||
|
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
@ -172,7 +173,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes using pkill command\n",
|
" # Kill any existing llama stack server processes using pkill command\n",
|
||||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
" os.system(\"pkill -f llama_stack.core.server.server\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,8 @@
|
||||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||||
|
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\n",
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,7 @@
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\n",
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,7 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro starter\n",
|
"!uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -47,11 +47,11 @@ function QuickStart() {
|
||||||
<pre><code>{`# Install uv and start Ollama
|
<pre><code>{`# Install uv and start Ollama
|
||||||
ollama run llama3.2:3b --keepalive 60m
|
ollama run llama3.2:3b --keepalive 60m
|
||||||
|
|
||||||
|
# Install server dependencies
|
||||||
|
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
# Run Llama Stack server
|
# Run Llama Stack server
|
||||||
OLLAMA_URL=http://localhost:11434 \\
|
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||||
uv run --with llama-stack \\
|
|
||||||
llama stack build --distro starter \\
|
|
||||||
--image-type venv --run
|
|
||||||
|
|
||||||
# Try the Python SDK
|
# Try the Python SDK
|
||||||
from llama_stack_client import LlamaStackClient
|
from llama_stack_client import LlamaStackClient
|
||||||
|
|
|
||||||
4
docs/static/deprecated-llama-stack-spec.html
vendored
4
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -13449,8 +13449,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Eval",
|
"name": "Eval",
|
||||||
"description": "",
|
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
"x-displayName": "Evaluations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
|
|
|
||||||
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -10196,9 +10196,9 @@ tags:
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
description: ''
|
description: ''
|
||||||
- name: Eval
|
- name: Eval
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
x-displayName: Evaluations
|
||||||
- name: Files
|
- name: Files
|
||||||
description: >-
|
description: >-
|
||||||
This API is used to upload documents that can be used with other Llama Stack
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
|
|
||||||
|
|
@ -5518,8 +5518,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Eval",
|
"name": "Eval",
|
||||||
"description": "",
|
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
"x-displayName": "Evaluations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "PostTraining (Coming Soon)",
|
"name": "PostTraining (Coming Soon)",
|
||||||
|
|
|
||||||
|
|
@ -4119,9 +4119,9 @@ tags:
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
description: ''
|
description: ''
|
||||||
- name: Eval
|
- name: Eval
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
x-displayName: Evaluations
|
||||||
- name: PostTraining (Coming Soon)
|
- name: PostTraining (Coming Soon)
|
||||||
description: ''
|
description: ''
|
||||||
x-tagGroups:
|
x-tagGroups:
|
||||||
|
|
|
||||||
34
docs/static/llama-stack-spec.html
vendored
34
docs/static/llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create a conversation.",
|
"summary": "Create a conversation.",
|
||||||
"description": "Create a conversation.",
|
"description": "Create a conversation.\nCreate a conversation.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -326,8 +326,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Get a conversation with the given ID.",
|
"summary": "Retrieve a conversation.",
|
||||||
"description": "Get a conversation with the given ID.",
|
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -369,8 +369,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Update a conversation's metadata with the given ID.",
|
"summary": "Update a conversation.",
|
||||||
"description": "Update a conversation's metadata with the given ID.",
|
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -422,8 +422,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation with the given ID.",
|
"summary": "Delete a conversation.",
|
||||||
"description": "Delete a conversation with the given ID.",
|
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -467,8 +467,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "List items in the conversation.",
|
"summary": "List items.",
|
||||||
"description": "List items in the conversation.",
|
"description": "List items.\nList items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -597,8 +597,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create items in the conversation.",
|
"summary": "Create items.",
|
||||||
"description": "Create items in the conversation.",
|
"description": "Create items.\nCreate items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -652,8 +652,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve a conversation item.",
|
"summary": "Retrieve an item.",
|
||||||
"description": "Retrieve a conversation item.",
|
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -704,8 +704,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation item.",
|
"summary": "Delete an item.",
|
||||||
"description": "Delete a conversation item.",
|
"description": "Delete an item.\nDelete a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -13251,8 +13251,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Conversations",
|
"name": "Conversations",
|
||||||
"description": "",
|
"description": "Protocol for conversation management operations.",
|
||||||
"x-displayName": "Protocol for conversation management operations."
|
"x-displayName": "Conversations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
|
|
|
||||||
56
docs/static/llama-stack-spec.yaml
vendored
56
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -192,7 +192,10 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create a conversation.
|
summary: Create a conversation.
|
||||||
description: Create a conversation.
|
description: >-
|
||||||
|
Create a conversation.
|
||||||
|
|
||||||
|
Create a conversation.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -222,8 +225,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Get a conversation with the given ID.
|
summary: Retrieve a conversation.
|
||||||
description: Get a conversation with the given ID.
|
description: >-
|
||||||
|
Retrieve a conversation.
|
||||||
|
|
||||||
|
Get a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -252,9 +258,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: >-
|
summary: Update a conversation.
|
||||||
Update a conversation's metadata with the given ID.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update a conversation.
|
||||||
|
|
||||||
Update a conversation's metadata with the given ID.
|
Update a conversation's metadata with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
|
|
@ -290,8 +297,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation with the given ID.
|
summary: Delete a conversation.
|
||||||
description: Delete a conversation with the given ID.
|
description: >-
|
||||||
|
Delete a conversation.
|
||||||
|
|
||||||
|
Delete a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -321,8 +331,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: List items in the conversation.
|
summary: List items.
|
||||||
description: List items in the conversation.
|
description: >-
|
||||||
|
List items.
|
||||||
|
|
||||||
|
List items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -495,8 +508,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create items in the conversation.
|
summary: Create items.
|
||||||
description: Create items in the conversation.
|
description: >-
|
||||||
|
Create items.
|
||||||
|
|
||||||
|
Create items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -532,8 +548,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Retrieve a conversation item.
|
summary: Retrieve an item.
|
||||||
description: Retrieve a conversation item.
|
description: >-
|
||||||
|
Retrieve an item.
|
||||||
|
|
||||||
|
Retrieve a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -568,8 +587,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation item.
|
summary: Delete an item.
|
||||||
description: Delete a conversation item.
|
description: >-
|
||||||
|
Delete an item.
|
||||||
|
|
||||||
|
Delete a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -10146,9 +10168,9 @@ tags:
|
||||||
- `background`
|
- `background`
|
||||||
x-displayName: Agents
|
x-displayName: Agents
|
||||||
- name: Conversations
|
- name: Conversations
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for conversation management operations.
|
Protocol for conversation management operations.
|
||||||
|
x-displayName: Conversations
|
||||||
- name: Files
|
- name: Files
|
||||||
description: >-
|
description: >-
|
||||||
This API is used to upload documents that can be used with other Llama Stack
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
|
|
||||||
38
docs/static/stainless-llama-stack-spec.html
vendored
38
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create a conversation.",
|
"summary": "Create a conversation.",
|
||||||
"description": "Create a conversation.",
|
"description": "Create a conversation.\nCreate a conversation.",
|
||||||
"parameters": [],
|
"parameters": [],
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
|
|
@ -326,8 +326,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Get a conversation with the given ID.",
|
"summary": "Retrieve a conversation.",
|
||||||
"description": "Get a conversation with the given ID.",
|
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -369,8 +369,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Update a conversation's metadata with the given ID.",
|
"summary": "Update a conversation.",
|
||||||
"description": "Update a conversation's metadata with the given ID.",
|
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -422,8 +422,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation with the given ID.",
|
"summary": "Delete a conversation.",
|
||||||
"description": "Delete a conversation with the given ID.",
|
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -467,8 +467,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "List items in the conversation.",
|
"summary": "List items.",
|
||||||
"description": "List items in the conversation.",
|
"description": "List items.\nList items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -597,8 +597,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Create items in the conversation.",
|
"summary": "Create items.",
|
||||||
"description": "Create items in the conversation.",
|
"description": "Create items.\nCreate items in the conversation.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -652,8 +652,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Retrieve a conversation item.",
|
"summary": "Retrieve an item.",
|
||||||
"description": "Retrieve a conversation item.",
|
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -704,8 +704,8 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"Conversations"
|
"Conversations"
|
||||||
],
|
],
|
||||||
"summary": "Delete a conversation item.",
|
"summary": "Delete an item.",
|
||||||
"description": "Delete a conversation item.",
|
"description": "Delete an item.\nDelete a conversation item.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "conversation_id",
|
"name": "conversation_id",
|
||||||
|
|
@ -17928,8 +17928,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Conversations",
|
"name": "Conversations",
|
||||||
"description": "",
|
"description": "Protocol for conversation management operations.",
|
||||||
"x-displayName": "Protocol for conversation management operations."
|
"x-displayName": "Conversations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "DatasetIO",
|
"name": "DatasetIO",
|
||||||
|
|
@ -17941,8 +17941,8 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Eval",
|
"name": "Eval",
|
||||||
"description": "",
|
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
"x-displayName": "Evaluations"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Files",
|
"name": "Files",
|
||||||
|
|
|
||||||
60
docs/static/stainless-llama-stack-spec.yaml
vendored
60
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -195,7 +195,10 @@ paths:
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create a conversation.
|
summary: Create a conversation.
|
||||||
description: Create a conversation.
|
description: >-
|
||||||
|
Create a conversation.
|
||||||
|
|
||||||
|
Create a conversation.
|
||||||
parameters: []
|
parameters: []
|
||||||
requestBody:
|
requestBody:
|
||||||
content:
|
content:
|
||||||
|
|
@ -225,8 +228,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Get a conversation with the given ID.
|
summary: Retrieve a conversation.
|
||||||
description: Get a conversation with the given ID.
|
description: >-
|
||||||
|
Retrieve a conversation.
|
||||||
|
|
||||||
|
Get a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -255,9 +261,10 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: >-
|
summary: Update a conversation.
|
||||||
Update a conversation's metadata with the given ID.
|
|
||||||
description: >-
|
description: >-
|
||||||
|
Update a conversation.
|
||||||
|
|
||||||
Update a conversation's metadata with the given ID.
|
Update a conversation's metadata with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
|
|
@ -293,8 +300,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation with the given ID.
|
summary: Delete a conversation.
|
||||||
description: Delete a conversation with the given ID.
|
description: >-
|
||||||
|
Delete a conversation.
|
||||||
|
|
||||||
|
Delete a conversation with the given ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -324,8 +334,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: List items in the conversation.
|
summary: List items.
|
||||||
description: List items in the conversation.
|
description: >-
|
||||||
|
List items.
|
||||||
|
|
||||||
|
List items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -498,8 +511,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Create items in the conversation.
|
summary: Create items.
|
||||||
description: Create items in the conversation.
|
description: >-
|
||||||
|
Create items.
|
||||||
|
|
||||||
|
Create items in the conversation.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -535,8 +551,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Retrieve a conversation item.
|
summary: Retrieve an item.
|
||||||
description: Retrieve a conversation item.
|
description: >-
|
||||||
|
Retrieve an item.
|
||||||
|
|
||||||
|
Retrieve a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -571,8 +590,11 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- Conversations
|
- Conversations
|
||||||
summary: Delete a conversation item.
|
summary: Delete an item.
|
||||||
description: Delete a conversation item.
|
description: >-
|
||||||
|
Delete an item.
|
||||||
|
|
||||||
|
Delete a conversation item.
|
||||||
parameters:
|
parameters:
|
||||||
- name: conversation_id
|
- name: conversation_id
|
||||||
in: path
|
in: path
|
||||||
|
|
@ -13533,17 +13555,17 @@ tags:
|
||||||
- name: Benchmarks
|
- name: Benchmarks
|
||||||
description: ''
|
description: ''
|
||||||
- name: Conversations
|
- name: Conversations
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Protocol for conversation management operations.
|
Protocol for conversation management operations.
|
||||||
|
x-displayName: Conversations
|
||||||
- name: DatasetIO
|
- name: DatasetIO
|
||||||
description: ''
|
description: ''
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
description: ''
|
description: ''
|
||||||
- name: Eval
|
- name: Eval
|
||||||
description: ''
|
description: >-
|
||||||
x-displayName: >-
|
|
||||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||||
|
x-displayName: Evaluations
|
||||||
- name: Files
|
- name: Files
|
||||||
description: >-
|
description: >-
|
||||||
This API is used to upload documents that can be used with other Llama Stack
|
This API is used to upload documents that can be used with other Llama Stack
|
||||||
|
|
|
||||||
|
|
@ -78,17 +78,14 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
|
|
||||||
## Build, Configure, and Run Llama Stack
|
## Build, Configure, and Run Llama Stack
|
||||||
|
|
||||||
1. **Build the Llama Stack**:
|
1. **Install dependencies**:
|
||||||
Build the Llama Stack using the `starter` template:
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with llama-stack llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
**Expected Output:**
|
|
||||||
|
2. **Start the distribution**:
|
||||||
```bash
|
```bash
|
||||||
...
|
llama stack run starter
|
||||||
Build Successful!
|
|
||||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
|
||||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Set the ENV variables by exporting them to the terminal**:
|
3. **Set the ENV variables by exporting them to the terminal**:
|
||||||
|
|
|
||||||
|
|
@ -173,7 +173,9 @@ class ConversationItemDeletedResource(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Conversations(Protocol):
|
class Conversations(Protocol):
|
||||||
"""Protocol for conversation management operations."""
|
"""Conversations
|
||||||
|
|
||||||
|
Protocol for conversation management operations."""
|
||||||
|
|
||||||
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def create_conversation(
|
async def create_conversation(
|
||||||
|
|
@ -181,6 +183,8 @@ class Conversations(Protocol):
|
||||||
) -> Conversation:
|
) -> Conversation:
|
||||||
"""Create a conversation.
|
"""Create a conversation.
|
||||||
|
|
||||||
|
Create a conversation.
|
||||||
|
|
||||||
:param items: Initial items to include in the conversation context.
|
:param items: Initial items to include in the conversation context.
|
||||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||||
:returns: The created conversation object.
|
:returns: The created conversation object.
|
||||||
|
|
@ -189,7 +193,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||||
"""Get a conversation with the given ID.
|
"""Retrieve a conversation.
|
||||||
|
|
||||||
|
Get a conversation with the given ID.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:returns: The conversation object.
|
:returns: The conversation object.
|
||||||
|
|
@ -198,7 +204,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||||
"""Update a conversation's metadata with the given ID.
|
"""Update a conversation.
|
||||||
|
|
||||||
|
Update a conversation's metadata with the given ID.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||||
|
|
@ -208,7 +216,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||||
"""Delete a conversation with the given ID.
|
"""Delete a conversation.
|
||||||
|
|
||||||
|
Delete a conversation with the given ID.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:returns: The deleted conversation resource.
|
:returns: The deleted conversation resource.
|
||||||
|
|
@ -217,7 +227,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||||
"""Create items in the conversation.
|
"""Create items.
|
||||||
|
|
||||||
|
Create items in the conversation.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param items: Items to include in the conversation context.
|
:param items: Items to include in the conversation context.
|
||||||
|
|
@ -227,7 +239,9 @@ class Conversations(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||||
"""Retrieve a conversation item.
|
"""Retrieve an item.
|
||||||
|
|
||||||
|
Retrieve a conversation item.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param item_id: The item identifier.
|
:param item_id: The item identifier.
|
||||||
|
|
@ -244,7 +258,9 @@ class Conversations(Protocol):
|
||||||
limit: int | NotGiven = NOT_GIVEN,
|
limit: int | NotGiven = NOT_GIVEN,
|
||||||
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||||||
) -> ConversationItemList:
|
) -> ConversationItemList:
|
||||||
"""List items in the conversation.
|
"""List items.
|
||||||
|
|
||||||
|
List items in the conversation.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param after: An item ID to list items after, used in pagination.
|
:param after: An item ID to list items after, used in pagination.
|
||||||
|
|
@ -259,7 +275,9 @@ class Conversations(Protocol):
|
||||||
async def openai_delete_conversation_item(
|
async def openai_delete_conversation_item(
|
||||||
self, conversation_id: str, item_id: str
|
self, conversation_id: str, item_id: str
|
||||||
) -> ConversationItemDeletedResource:
|
) -> ConversationItemDeletedResource:
|
||||||
"""Delete a conversation item.
|
"""Delete an item.
|
||||||
|
|
||||||
|
Delete a conversation item.
|
||||||
|
|
||||||
:param conversation_id: The conversation identifier.
|
:param conversation_id: The conversation identifier.
|
||||||
:param item_id: The item identifier.
|
:param item_id: The item identifier.
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,9 @@ class EvaluateResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class Eval(Protocol):
|
class Eval(Protocol):
|
||||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
"""Evaluations
|
||||||
|
|
||||||
|
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
|
|
|
||||||
182
llama_stack/cli/stack/_list_deps.py
Normal file
182
llama_stack/cli/stack/_list_deps.py
Normal file
|
|
@ -0,0 +1,182 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
|
from llama_stack.core.build import get_provider_dependencies
|
||||||
|
from llama_stack.core.datatypes import (
|
||||||
|
BuildConfig,
|
||||||
|
BuildProvider,
|
||||||
|
DistributionSpec,
|
||||||
|
)
|
||||||
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
|
from llama_stack.core.stack import replace_env_vars
|
||||||
|
from llama_stack.log import get_logger
|
||||||
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||||
|
|
||||||
|
logger = get_logger(name=__name__, category="cli")
|
||||||
|
|
||||||
|
|
||||||
|
# These are the dependencies needed by the distribution server.
|
||||||
|
# `llama-stack` is automatically installed by the installation script.
|
||||||
|
SERVER_DEPENDENCIES = [
|
||||||
|
"aiosqlite",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"uvicorn",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def format_output_deps_only(
|
||||||
|
normal_deps: list[str],
|
||||||
|
special_deps: list[str],
|
||||||
|
external_deps: list[str],
|
||||||
|
uv: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""Format dependencies as a list."""
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
uv_str = ""
|
||||||
|
if uv:
|
||||||
|
uv_str = "uv pip install "
|
||||||
|
|
||||||
|
# Quote deps with commas
|
||||||
|
quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
|
||||||
|
lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
|
||||||
|
|
||||||
|
for special_dep in special_deps:
|
||||||
|
lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
|
||||||
|
|
||||||
|
for external_dep in external_deps:
|
||||||
|
lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||||
|
if args.config:
|
||||||
|
try:
|
||||||
|
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||||
|
|
||||||
|
config_file = resolve_config_or_distro(args.config, Mode.BUILD)
|
||||||
|
except ValueError as e:
|
||||||
|
cprint(
|
||||||
|
f"Could not parse config file {args.config}: {e}",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if config_file:
|
||||||
|
with open(config_file) as f:
|
||||||
|
try:
|
||||||
|
contents = yaml.safe_load(f)
|
||||||
|
contents = replace_env_vars(contents)
|
||||||
|
build_config = BuildConfig(**contents)
|
||||||
|
build_config.image_type = "venv"
|
||||||
|
except Exception as e:
|
||||||
|
cprint(
|
||||||
|
f"Could not parse config file {config_file}: {e}",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
elif args.providers:
|
||||||
|
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||||
|
for api_provider in args.providers.split(","):
|
||||||
|
if "=" not in api_provider:
|
||||||
|
cprint(
|
||||||
|
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
api, provider_type = api_provider.split("=")
|
||||||
|
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||||
|
if providers_for_api is None:
|
||||||
|
cprint(
|
||||||
|
f"{api} is not a valid API.",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if provider_type in providers_for_api:
|
||||||
|
provider = BuildProvider(
|
||||||
|
provider_type=provider_type,
|
||||||
|
module=None,
|
||||||
|
)
|
||||||
|
provider_list.setdefault(api, []).append(provider)
|
||||||
|
else:
|
||||||
|
cprint(
|
||||||
|
f"{provider_type} is not a valid provider for the {api} API.",
|
||||||
|
color="red",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
distribution_spec = DistributionSpec(
|
||||||
|
providers=provider_list,
|
||||||
|
description=",".join(args.providers),
|
||||||
|
)
|
||||||
|
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
|
||||||
|
|
||||||
|
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
||||||
|
normal_deps += SERVER_DEPENDENCIES
|
||||||
|
|
||||||
|
# Add external API dependencies
|
||||||
|
if build_config.external_apis_dir:
|
||||||
|
from llama_stack.core.external import load_external_apis
|
||||||
|
|
||||||
|
external_apis = load_external_apis(build_config)
|
||||||
|
if external_apis:
|
||||||
|
for _, api_spec in external_apis.items():
|
||||||
|
normal_deps.extend(api_spec.pip_packages)
|
||||||
|
|
||||||
|
# Format and output based on requested format
|
||||||
|
output = format_output_deps_only(
|
||||||
|
normal_deps=normal_deps,
|
||||||
|
special_deps=special_deps,
|
||||||
|
external_deps=external_provider_dependencies,
|
||||||
|
uv=args.format == "uv",
|
||||||
|
)
|
||||||
|
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
|
||||||
|
def quote_if_needed(dep):
|
||||||
|
# Add quotes if the dependency contains special characters that need escaping in shell
|
||||||
|
# This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
|
||||||
|
needs_quoting = any(char in dep for char in [",", "<", ">", "="])
|
||||||
|
return f"'{dep}'" if needs_quoting else dep
|
||||||
|
|
||||||
|
|
||||||
|
def quote_special_dep(dep_string):
|
||||||
|
"""
|
||||||
|
Quote individual packages in a special dependency string.
|
||||||
|
Special deps may contain multiple packages and flags like --extra-index-url.
|
||||||
|
We need to quote only the package specs that contain special characters.
|
||||||
|
"""
|
||||||
|
parts = dep_string.split()
|
||||||
|
quoted_parts = []
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
# Don't quote flags (they start with -)
|
||||||
|
if part.startswith("-"):
|
||||||
|
quoted_parts.append(part)
|
||||||
|
else:
|
||||||
|
# Quote package specs that need it
|
||||||
|
quoted_parts.append(quote_if_needed(part))
|
||||||
|
|
||||||
|
return " ".join(quoted_parts)
|
||||||
|
|
@ -8,6 +8,9 @@ import textwrap
|
||||||
|
|
||||||
from llama_stack.cli.stack.utils import ImageType
|
from llama_stack.cli.stack.utils import ImageType
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
from llama_stack.log import get_logger
|
||||||
|
|
||||||
|
logger = get_logger(__name__, category="cli")
|
||||||
|
|
||||||
|
|
||||||
class StackBuild(Subcommand):
|
class StackBuild(Subcommand):
|
||||||
|
|
@ -16,7 +19,7 @@ class StackBuild(Subcommand):
|
||||||
self.parser = subparsers.add_parser(
|
self.parser = subparsers.add_parser(
|
||||||
"build",
|
"build",
|
||||||
prog="llama stack build",
|
prog="llama stack build",
|
||||||
description="Build a Llama stack container",
|
description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||||
)
|
)
|
||||||
self._add_arguments()
|
self._add_arguments()
|
||||||
|
|
@ -93,6 +96,9 @@ the build. If not specified, currently active environment will be used if found.
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||||
|
logger.warning(
|
||||||
|
"The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
|
||||||
|
)
|
||||||
# always keep implementation completely silo-ed away from CLI so CLI
|
# always keep implementation completely silo-ed away from CLI so CLI
|
||||||
# can be fast to load and reduces dependencies
|
# can be fast to load and reduces dependencies
|
||||||
from ._build import run_stack_build_command
|
from ._build import run_stack_build_command
|
||||||
|
|
|
||||||
51
llama_stack/cli/stack/list_deps.py
Normal file
51
llama_stack/cli/stack/list_deps.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
|
class StackListDeps(Subcommand):
|
||||||
|
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||||
|
super().__init__()
|
||||||
|
self.parser = subparsers.add_parser(
|
||||||
|
"list-deps",
|
||||||
|
prog="llama stack list-deps",
|
||||||
|
description="list the dependencies for a llama stack distribution",
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||||
|
)
|
||||||
|
self._add_arguments()
|
||||||
|
self.parser.set_defaults(func=self._run_stack_list_deps_command)
|
||||||
|
|
||||||
|
def _add_arguments(self):
|
||||||
|
self.parser.add_argument(
|
||||||
|
"config",
|
||||||
|
type=str,
|
||||||
|
nargs="?", # Make it optional
|
||||||
|
metavar="config | distro",
|
||||||
|
help="Path to config file to use or name of known distro (llama stack list for a list).",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.parser.add_argument(
|
||||||
|
"--providers",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
|
||||||
|
)
|
||||||
|
self.parser.add_argument(
|
||||||
|
"--format",
|
||||||
|
type=str,
|
||||||
|
choices=["uv", "deps-only"],
|
||||||
|
default="deps-only",
|
||||||
|
help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
||||||
|
# always keep implementation completely silo-ed away from CLI so CLI
|
||||||
|
# can be fast to load and reduces dependencies
|
||||||
|
from ._list_deps import run_stack_list_deps_command
|
||||||
|
|
||||||
|
return run_stack_list_deps_command(args)
|
||||||
|
|
@ -13,6 +13,7 @@ from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
from .build import StackBuild
|
from .build import StackBuild
|
||||||
from .list_apis import StackListApis
|
from .list_apis import StackListApis
|
||||||
|
from .list_deps import StackListDeps
|
||||||
from .list_providers import StackListProviders
|
from .list_providers import StackListProviders
|
||||||
from .remove import StackRemove
|
from .remove import StackRemove
|
||||||
from .run import StackRun
|
from .run import StackRun
|
||||||
|
|
@ -39,6 +40,7 @@ class StackParser(Subcommand):
|
||||||
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
||||||
|
|
||||||
# Add sub-commands
|
# Add sub-commands
|
||||||
|
StackListDeps.create(subparsers)
|
||||||
StackBuild.create(subparsers)
|
StackBuild.create(subparsers)
|
||||||
StackListApis.create(subparsers)
|
StackListApis.create(subparsers)
|
||||||
StackListProviders.create(subparsers)
|
StackListProviders.create(subparsers)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,28 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import (
|
||||||
|
BuildConfig,
|
||||||
|
Provider,
|
||||||
|
StackRunConfig,
|
||||||
|
)
|
||||||
|
from llama_stack.core.distribution import get_provider_registry
|
||||||
|
from llama_stack.core.resolver import InvalidProviderError
|
||||||
|
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
||||||
|
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||||
|
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||||
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
|
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||||
|
|
||||||
|
|
||||||
class ImageType(Enum):
|
class ImageType(Enum):
|
||||||
|
|
@ -19,3 +40,91 @@ def print_subcommand_description(parser, subparsers):
|
||||||
description = subcommand.description
|
description = subcommand.description
|
||||||
description_text += f" {name:<21} {description}\n"
|
description_text += f" {name:<21} {description}\n"
|
||||||
parser.epilog = description_text
|
parser.epilog = description_text
|
||||||
|
|
||||||
|
|
||||||
|
def generate_run_config(
|
||||||
|
build_config: BuildConfig,
|
||||||
|
build_dir: Path,
|
||||||
|
image_name: str,
|
||||||
|
) -> Path:
|
||||||
|
"""
|
||||||
|
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||||
|
"""
|
||||||
|
apis = list(build_config.distribution_spec.providers.keys())
|
||||||
|
run_config = StackRunConfig(
|
||||||
|
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||||
|
image_name=image_name,
|
||||||
|
apis=apis,
|
||||||
|
providers={},
|
||||||
|
external_providers_dir=build_config.external_providers_dir
|
||||||
|
if build_config.external_providers_dir
|
||||||
|
else EXTERNAL_PROVIDERS_DIR,
|
||||||
|
)
|
||||||
|
# build providers dict
|
||||||
|
provider_registry = get_provider_registry(build_config)
|
||||||
|
for api in apis:
|
||||||
|
run_config.providers[api] = []
|
||||||
|
providers = build_config.distribution_spec.providers[api]
|
||||||
|
|
||||||
|
for provider in providers:
|
||||||
|
pid = provider.provider_type.split("::")[-1]
|
||||||
|
|
||||||
|
p = provider_registry[Api(api)][provider.provider_type]
|
||||||
|
if p.deprecation_error:
|
||||||
|
raise InvalidProviderError(p.deprecation_error)
|
||||||
|
|
||||||
|
try:
|
||||||
|
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
||||||
|
except (ModuleNotFoundError, ValueError) as exc:
|
||||||
|
# HACK ALERT:
|
||||||
|
# This code executes after building is done, the import cannot work since the
|
||||||
|
# package is either available in the venv or container - not available on the host.
|
||||||
|
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
||||||
|
# external
|
||||||
|
cprint(
|
||||||
|
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
||||||
|
color="yellow",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
# Set config_type to None to avoid UnboundLocalError
|
||||||
|
config_type = None
|
||||||
|
|
||||||
|
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
||||||
|
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
||||||
|
else:
|
||||||
|
config = {}
|
||||||
|
|
||||||
|
p_spec = Provider(
|
||||||
|
provider_id=pid,
|
||||||
|
provider_type=provider.provider_type,
|
||||||
|
config=config,
|
||||||
|
module=provider.module,
|
||||||
|
)
|
||||||
|
run_config.providers[api].append(p_spec)
|
||||||
|
|
||||||
|
run_config_file = build_dir / f"{image_name}-run.yaml"
|
||||||
|
|
||||||
|
with open(run_config_file, "w") as f:
|
||||||
|
to_write = json.loads(run_config.model_dump_json())
|
||||||
|
f.write(yaml.dump(to_write, sort_keys=False))
|
||||||
|
|
||||||
|
# Only print this message for non-container builds since it will be displayed before the
|
||||||
|
# container is built
|
||||||
|
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
||||||
|
# makes sense to display this message
|
||||||
|
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||||
|
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||||
|
return run_config_file
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache
|
||||||
|
def available_templates_specs() -> dict[str, BuildConfig]:
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
template_specs = {}
|
||||||
|
for p in TEMPLATES_PATH.rglob("*build.yaml"):
|
||||||
|
template_name = p.parent.name
|
||||||
|
with open(p) as f:
|
||||||
|
build_config = BuildConfig(**yaml.safe_load(f))
|
||||||
|
template_specs[template_name] = build_config
|
||||||
|
return template_specs
|
||||||
|
|
|
||||||
|
|
@ -338,7 +338,7 @@ fi
|
||||||
# Add other require item commands genearic to all containers
|
# Add other require item commands genearic to all containers
|
||||||
add_to_container << EOF
|
add_to_container << EOF
|
||||||
|
|
||||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
|
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
import importlib
|
import importlib
|
||||||
|
import importlib.metadata
|
||||||
import inspect
|
import inspect
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,3 +42,8 @@ def sync_test_context_from_provider_data():
|
||||||
return TEST_CONTEXT.set(provider_data["__test_id"])
|
return TEST_CONTEXT.set(provider_data["__test_id"])
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_debug_mode() -> bool:
|
||||||
|
"""Check if test recording debug mode is enabled via LLAMA_STACK_TEST_DEBUG env var."""
|
||||||
|
return os.environ.get("LLAMA_STACK_TEST_DEBUG", "").lower() in ("1", "true", "yes")
|
||||||
|
|
|
||||||
|
|
@ -42,25 +42,25 @@ def resolve_config_or_distro(
|
||||||
# Strategy 1: Try as file path first
|
# Strategy 1: Try as file path first
|
||||||
config_path = Path(config_or_distro)
|
config_path = Path(config_or_distro)
|
||||||
if config_path.exists() and config_path.is_file():
|
if config_path.exists() and config_path.is_file():
|
||||||
logger.info(f"Using file path: {config_path}")
|
logger.debug(f"Using file path: {config_path}")
|
||||||
return config_path.resolve()
|
return config_path.resolve()
|
||||||
|
|
||||||
# Strategy 2: Try as distribution name (if no .yaml extension)
|
# Strategy 2: Try as distribution name (if no .yaml extension)
|
||||||
if not config_or_distro.endswith(".yaml"):
|
if not config_or_distro.endswith(".yaml"):
|
||||||
distro_config = _get_distro_config_path(config_or_distro, mode)
|
distro_config = _get_distro_config_path(config_or_distro, mode)
|
||||||
if distro_config.exists():
|
if distro_config.exists():
|
||||||
logger.info(f"Using distribution: {distro_config}")
|
logger.debug(f"Using distribution: {distro_config}")
|
||||||
return distro_config
|
return distro_config
|
||||||
|
|
||||||
# Strategy 3: Try as built distribution name
|
# Strategy 3: Try as built distribution name
|
||||||
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||||
if distrib_config.exists():
|
if distrib_config.exists():
|
||||||
logger.info(f"Using built distribution: {distrib_config}")
|
logger.debug(f"Using built distribution: {distrib_config}")
|
||||||
return distrib_config
|
return distrib_config
|
||||||
|
|
||||||
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||||
if distrib_config.exists():
|
if distrib_config.exists():
|
||||||
logger.info(f"Using built distribution: {distrib_config}")
|
logger.debug(f"Using built distribution: {distrib_config}")
|
||||||
return distrib_config
|
return distrib_config
|
||||||
|
|
||||||
# Strategy 4: Failed - provide helpful error
|
# Strategy 4: Failed - provide helpful error
|
||||||
|
|
|
||||||
|
|
@ -70,10 +70,10 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
Make sure you have the Llama Stack CLI available.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro {{ name }} --image-type venv
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/{{ name }}/run.yaml \
|
llama stack run distributions/{{ name }}/run.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
|
|
|
||||||
|
|
@ -126,11 +126,11 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack build --distro nvidia --image-type venv
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,6 @@ class TelemetryAdapter(Telemetry):
|
||||||
metrics.set_meter_provider(metric_provider)
|
metrics.set_meter_provider(metric_provider)
|
||||||
|
|
||||||
self.meter = metrics.get_meter(__name__)
|
self.meter = metrics.get_meter(__name__)
|
||||||
|
|
||||||
self._lock = _global_lock
|
self._lock = _global_lock
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ The following example shows how to create a chat completion for an NVIDIA NIM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|
@ -67,37 +67,40 @@ print(f"Response: {response.choices[0].message.content}")
|
||||||
The following example shows how to do tool calling for an NVIDIA NIM.
|
The following example shows how to do tool calling for an NVIDIA NIM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
tool_definition = {
|
||||||
|
"type": "function",
|
||||||
tool_definition = ToolDefinition(
|
"function": {
|
||||||
tool_name="get_weather",
|
"name": "get_weather",
|
||||||
description="Get current weather information for a location",
|
"description": "Get current weather information for a location",
|
||||||
parameters={
|
"parameters": {
|
||||||
"location": ToolParamDefinition(
|
"type": "object",
|
||||||
param_type="string",
|
"properties": {
|
||||||
description="The city and state, e.g. San Francisco, CA",
|
"location": {
|
||||||
required=True,
|
"type": "string",
|
||||||
),
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
"unit": ToolParamDefinition(
|
|
||||||
param_type="string",
|
|
||||||
description="Temperature unit (celsius or fahrenheit)",
|
|
||||||
required=False,
|
|
||||||
default="celsius",
|
|
||||||
),
|
|
||||||
},
|
},
|
||||||
)
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Temperature unit (celsius or fahrenheit)",
|
||||||
|
"default": "celsius",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
tool_response = client.chat.completions.create(
|
tool_response = client.chat.completions.create(
|
||||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||||
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
|
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
|
||||||
tools=[tool_definition],
|
tools=[tool_definition],
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Tool Response: {tool_response.choices[0].message.content}")
|
print(f"Response content: {tool_response.choices[0].message.content}")
|
||||||
if tool_response.choices[0].message.tool_calls:
|
if tool_response.choices[0].message.tool_calls:
|
||||||
for tool_call in tool_response.choices[0].message.tool_calls:
|
for tool_call in tool_response.choices[0].message.tool_calls:
|
||||||
print(f"Tool Called: {tool_call.tool_name}")
|
print(f"Tool Called: {tool_call.function.name}")
|
||||||
print(f"Arguments: {tool_call.arguments}")
|
print(f"Arguments: {tool_call.function.arguments}")
|
||||||
```
|
```
|
||||||
|
|
||||||
### Structured Output Example
|
### Structured Output Example
|
||||||
|
|
@ -105,33 +108,26 @@ if tool_response.choices[0].message.tool_calls:
|
||||||
The following example shows how to do structured output for an NVIDIA NIM.
|
The following example shows how to do structured output for an NVIDIA NIM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
|
|
||||||
|
|
||||||
person_schema = {
|
person_schema = {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"name": {"type": "string"},
|
"name": {"type": "string"},
|
||||||
"age": {"type": "integer"},
|
"age": {"type": "number"},
|
||||||
"occupation": {"type": "string"},
|
"occupation": {"type": "string"},
|
||||||
},
|
},
|
||||||
"required": ["name", "age", "occupation"],
|
"required": ["name", "age", "occupation"],
|
||||||
}
|
}
|
||||||
|
|
||||||
response_format = JsonSchemaResponseFormat(
|
|
||||||
type=ResponseFormatType.json_schema, json_schema=person_schema
|
|
||||||
)
|
|
||||||
|
|
||||||
structured_response = client.chat.completions.create(
|
structured_response = client.chat.completions.create(
|
||||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
|
"content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
response_format=response_format,
|
extra_body={"nvext": {"guided_json": person_schema}},
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Structured Response: {structured_response.choices[0].message.content}")
|
print(f"Structured Response: {structured_response.choices[0].message.content}")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -141,7 +137,7 @@ The following example shows how to create embeddings for an NVIDIA NIM.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
response = client.embeddings.create(
|
response = client.embeddings.create(
|
||||||
model="nvidia/llama-3.2-nv-embedqa-1b-v2",
|
model="nvidia/nvidia/llama-3.2-nv-embedqa-1b-v2",
|
||||||
input=["What is the capital of France?"],
|
input=["What is the capital of France?"],
|
||||||
extra_body={"input_type": "query"},
|
extra_body={"input_type": "query"},
|
||||||
)
|
)
|
||||||
|
|
@ -163,15 +159,15 @@ image_path = {path_to_the_image}
|
||||||
demo_image_b64 = load_image_as_base64(image_path)
|
demo_image_b64 = load_image_as_base64(image_path)
|
||||||
|
|
||||||
vlm_response = client.chat.completions.create(
|
vlm_response = client.chat.completions.create(
|
||||||
model="nvidia/vila",
|
model="nvidia/meta/llama-3.2-11b-vision-instruct",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "image",
|
"type": "image_url",
|
||||||
"image": {
|
"image_url": {
|
||||||
"data": demo_image_b64,
|
"url": f"data:image/png;base64,{demo_image_b64}",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -19,15 +19,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
NVIDIA Inference Adapter for Llama Stack.
|
NVIDIA Inference Adapter for Llama Stack.
|
||||||
|
|
||||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
|
||||||
ModelRegistryHelper to ensure that OpenAIMixin.check_model_availability()
|
|
||||||
is used instead of ModelRegistryHelper.check_model_availability(). It also
|
|
||||||
must come before Inference to ensure that OpenAIMixin methods are available
|
|
||||||
in the Inference interface.
|
|
||||||
|
|
||||||
- OpenAIMixin.check_model_availability() queries the NVIDIA API to check if a model exists
|
|
||||||
- ModelRegistryHelper.check_model_availability() just returns False and shows a warning
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
|
||||||
"__class__": class_name,
|
"__class__": class_name,
|
||||||
"__method__": method_name,
|
"__method__": method_name,
|
||||||
"__type__": span_type,
|
"__type__": span_type,
|
||||||
"__args__": str(combined_args),
|
"__args__": json.dumps(combined_args),
|
||||||
}
|
}
|
||||||
|
|
||||||
return class_name, method_name, span_attributes
|
return class_name, method_name, span_attributes
|
||||||
|
|
@ -82,8 +82,8 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
|
||||||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
||||||
|
|
||||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
||||||
try:
|
|
||||||
count = 0
|
count = 0
|
||||||
|
try:
|
||||||
async for item in method(self, *args, **kwargs):
|
async for item in method(self, *args, **kwargs):
|
||||||
yield item
|
yield item
|
||||||
count += 1
|
count += 1
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ _id_counters: dict[str, dict[str, int]] = {}
|
||||||
# Test context uses ContextVar since it changes per-test and needs async isolation
|
# Test context uses ContextVar since it changes per-test and needs async isolation
|
||||||
from openai.types.completion_choice import CompletionChoice
|
from openai.types.completion_choice import CompletionChoice
|
||||||
|
|
||||||
from llama_stack.core.testing_context import get_test_context
|
from llama_stack.core.testing_context import get_test_context, is_debug_mode
|
||||||
|
|
||||||
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
||||||
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
||||||
|
|
@ -146,6 +146,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
||||||
|
|
||||||
body_for_hash = _normalize_body_for_hash(body)
|
body_for_hash = _normalize_body_for_hash(body)
|
||||||
|
|
||||||
|
test_id = get_test_context()
|
||||||
normalized: dict[str, Any] = {
|
normalized: dict[str, Any] = {
|
||||||
"method": method.upper(),
|
"method": method.upper(),
|
||||||
"endpoint": parsed.path,
|
"endpoint": parsed.path,
|
||||||
|
|
@ -154,10 +155,20 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
||||||
|
|
||||||
# Include test_id for isolation, except for shared infrastructure endpoints
|
# Include test_id for isolation, except for shared infrastructure endpoints
|
||||||
if parsed.path not in ("/api/tags", "/v1/models"):
|
if parsed.path not in ("/api/tags", "/v1/models"):
|
||||||
normalized["test_id"] = get_test_context()
|
normalized["test_id"] = test_id
|
||||||
|
|
||||||
normalized_json = json.dumps(normalized, sort_keys=True)
|
normalized_json = json.dumps(normalized, sort_keys=True)
|
||||||
return hashlib.sha256(normalized_json.encode()).hexdigest()
|
request_hash = hashlib.sha256(normalized_json.encode()).hexdigest()
|
||||||
|
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Hash computation:")
|
||||||
|
logger.info(f" Test ID: {test_id}")
|
||||||
|
logger.info(f" Method: {method.upper()}")
|
||||||
|
logger.info(f" Endpoint: {parsed.path}")
|
||||||
|
logger.info(f" Model: {body.get('model', 'N/A')}")
|
||||||
|
logger.info(f" Computed hash: {request_hash}")
|
||||||
|
|
||||||
|
return request_hash
|
||||||
|
|
||||||
|
|
||||||
def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str, Any]) -> str:
|
def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str, Any]) -> str:
|
||||||
|
|
@ -212,6 +223,11 @@ def patch_httpx_for_test_id():
|
||||||
provider_data["__test_id"] = test_id
|
provider_data["__test_id"] = test_id
|
||||||
request.headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
|
request.headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
|
||||||
|
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Injected test ID into request header:")
|
||||||
|
logger.info(f" Test ID: {test_id}")
|
||||||
|
logger.info(f" URL: {request.url}")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
LlamaStackClient._prepare_request = patched_prepare_request
|
LlamaStackClient._prepare_request = patched_prepare_request
|
||||||
|
|
@ -355,12 +371,35 @@ class ResponseStorage:
|
||||||
test_file = test_id.split("::")[0] # Remove test function part
|
test_file = test_id.split("::")[0] # Remove test function part
|
||||||
test_dir = Path(test_file).parent # Get parent directory
|
test_dir = Path(test_file).parent # Get parent directory
|
||||||
|
|
||||||
# Put recordings in a "recordings" subdirectory of the test's parent dir
|
if self.base_dir.is_absolute():
|
||||||
# e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
|
repo_root = self.base_dir.parent.parent.parent
|
||||||
return test_dir / "recordings"
|
result = repo_root / test_dir / "recordings"
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Path resolution (absolute base_dir):")
|
||||||
|
logger.info(f" Test ID: {test_id}")
|
||||||
|
logger.info(f" Base dir: {self.base_dir}")
|
||||||
|
logger.info(f" Repo root: {repo_root}")
|
||||||
|
logger.info(f" Test file: {test_file}")
|
||||||
|
logger.info(f" Test dir: {test_dir}")
|
||||||
|
logger.info(f" Recordings dir: {result}")
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
result = test_dir / "recordings"
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Path resolution (relative base_dir):")
|
||||||
|
logger.info(f" Test ID: {test_id}")
|
||||||
|
logger.info(f" Base dir: {self.base_dir}")
|
||||||
|
logger.info(f" Test dir: {test_dir}")
|
||||||
|
logger.info(f" Recordings dir: {result}")
|
||||||
|
return result
|
||||||
else:
|
else:
|
||||||
# Fallback for non-test contexts
|
# Fallback for non-test contexts
|
||||||
return self.base_dir / "recordings"
|
result = self.base_dir / "recordings"
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Path resolution (no test context):")
|
||||||
|
logger.info(f" Base dir: {self.base_dir}")
|
||||||
|
logger.info(f" Recordings dir: {result}")
|
||||||
|
return result
|
||||||
|
|
||||||
def _ensure_directory(self):
|
def _ensure_directory(self):
|
||||||
"""Ensure test-specific directories exist."""
|
"""Ensure test-specific directories exist."""
|
||||||
|
|
@ -395,6 +434,13 @@ class ResponseStorage:
|
||||||
|
|
||||||
response_path = responses_dir / response_file
|
response_path = responses_dir / response_file
|
||||||
|
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Storing recording:")
|
||||||
|
logger.info(f" Request hash: {request_hash}")
|
||||||
|
logger.info(f" File: {response_path}")
|
||||||
|
logger.info(f" Test ID: {get_test_context()}")
|
||||||
|
logger.info(f" Endpoint: {endpoint}")
|
||||||
|
|
||||||
# Save response to JSON file with metadata
|
# Save response to JSON file with metadata
|
||||||
with open(response_path, "w") as f:
|
with open(response_path, "w") as f:
|
||||||
json.dump(
|
json.dump(
|
||||||
|
|
@ -423,16 +469,33 @@ class ResponseStorage:
|
||||||
test_dir = self._get_test_dir()
|
test_dir = self._get_test_dir()
|
||||||
response_path = test_dir / response_file
|
response_path = test_dir / response_file
|
||||||
|
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Looking up recording:")
|
||||||
|
logger.info(f" Request hash: {request_hash}")
|
||||||
|
logger.info(f" Primary path: {response_path}")
|
||||||
|
logger.info(f" Primary exists: {response_path.exists()}")
|
||||||
|
|
||||||
if response_path.exists():
|
if response_path.exists():
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info(" Found in primary location")
|
||||||
return _recording_from_file(response_path)
|
return _recording_from_file(response_path)
|
||||||
|
|
||||||
# Fallback to base recordings directory (for session-level recordings)
|
# Fallback to base recordings directory (for session-level recordings)
|
||||||
fallback_dir = self.base_dir / "recordings"
|
fallback_dir = self.base_dir / "recordings"
|
||||||
fallback_path = fallback_dir / response_file
|
fallback_path = fallback_dir / response_file
|
||||||
|
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info(f" Fallback path: {fallback_path}")
|
||||||
|
logger.info(f" Fallback exists: {fallback_path.exists()}")
|
||||||
|
|
||||||
if fallback_path.exists():
|
if fallback_path.exists():
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info(" Found in fallback location")
|
||||||
return _recording_from_file(fallback_path)
|
return _recording_from_file(fallback_path)
|
||||||
|
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info(" Recording not found in either location")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
|
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
|
||||||
|
|
@ -588,6 +651,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
mode = _current_mode
|
mode = _current_mode
|
||||||
storage = _current_storage
|
storage = _current_storage
|
||||||
|
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.info("[RECORDING DEBUG] Entering inference method:")
|
||||||
|
logger.info(f" Mode: {mode}")
|
||||||
|
logger.info(f" Client type: {client_type}")
|
||||||
|
logger.info(f" Endpoint: {endpoint}")
|
||||||
|
logger.info(f" Test context: {get_test_context()}")
|
||||||
|
|
||||||
if mode == APIRecordingMode.LIVE or storage is None:
|
if mode == APIRecordingMode.LIVE or storage is None:
|
||||||
if endpoint == "/v1/models":
|
if endpoint == "/v1/models":
|
||||||
return original_method(self, *args, **kwargs)
|
return original_method(self, *args, **kwargs)
|
||||||
|
|
@ -643,6 +713,18 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
||||||
return response_body
|
return response_body
|
||||||
elif mode == APIRecordingMode.REPLAY:
|
elif mode == APIRecordingMode.REPLAY:
|
||||||
# REPLAY mode requires recording to exist
|
# REPLAY mode requires recording to exist
|
||||||
|
if is_debug_mode():
|
||||||
|
logger.error("[RECORDING DEBUG] Recording not found!")
|
||||||
|
logger.error(f" Mode: {mode}")
|
||||||
|
logger.error(f" Request hash: {request_hash}")
|
||||||
|
logger.error(f" Method: {method}")
|
||||||
|
logger.error(f" URL: {url}")
|
||||||
|
logger.error(f" Endpoint: {endpoint}")
|
||||||
|
logger.error(f" Model: {body.get('model', 'unknown')}")
|
||||||
|
logger.error(f" Test context: {get_test_context()}")
|
||||||
|
logger.error(
|
||||||
|
f" Stack config type: {os.environ.get('LLAMA_STACK_TEST_STACK_CONFIG_TYPE', 'library_client')}"
|
||||||
|
)
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Recording not found for request hash: {request_hash}\n"
|
f"Recording not found for request hash: {request_hash}\n"
|
||||||
f"Model: {body.get('model', 'unknown')} | Request: {method} {url}\n"
|
f"Model: {body.get('model', 'unknown')} | Request: {method} {url}\n"
|
||||||
|
|
|
||||||
2647
llama_stack/ui/package-lock.json
generated
2647
llama_stack/ui/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -43,16 +43,16 @@
|
||||||
"@testing-library/dom": "^10.4.1",
|
"@testing-library/dom": "^10.4.1",
|
||||||
"@testing-library/jest-dom": "^6.8.0",
|
"@testing-library/jest-dom": "^6.8.0",
|
||||||
"@testing-library/react": "^16.3.0",
|
"@testing-library/react": "^16.3.0",
|
||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^30.0.0",
|
||||||
"@types/node": "^24",
|
"@types/node": "^24",
|
||||||
"@types/react": "^19",
|
"@types/react": "^19",
|
||||||
"@types/react-dom": "^19",
|
"@types/react-dom": "^19",
|
||||||
"eslint": "^9",
|
"eslint": "^9",
|
||||||
"eslint-config-next": "15.5.2",
|
"eslint-config-next": "15.5.6",
|
||||||
"eslint-config-prettier": "^10.1.8",
|
"eslint-config-prettier": "^10.1.8",
|
||||||
"eslint-plugin-prettier": "^5.5.4",
|
"eslint-plugin-prettier": "^5.5.4",
|
||||||
"jest": "^29.7.0",
|
"jest": "^30.2.0",
|
||||||
"jest-environment-jsdom": "^30.1.2",
|
"jest-environment-jsdom": "^30.2.0",
|
||||||
"prettier": "3.6.2",
|
"prettier": "3.6.2",
|
||||||
"tailwindcss": "^4",
|
"tailwindcss": "^4",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
|
|
|
||||||
370
scripts/diagnose_recordings.py
Executable file
370
scripts/diagnose_recordings.py
Executable file
|
|
@ -0,0 +1,370 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Diagnostic tool for debugging test recording issues.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Find where a hash would be looked up
|
||||||
|
./scripts/diagnose_recordings.py find-hash 7526c930eab04ce337496a26cd15f2591d7943035f2527182861643da9b837a7
|
||||||
|
|
||||||
|
# Show what's in a recording file
|
||||||
|
./scripts/diagnose_recordings.py show tests/integration/agents/recordings/7526c930....json
|
||||||
|
|
||||||
|
# List all recordings for a test
|
||||||
|
./scripts/diagnose_recordings.py list-test "tests/integration/agents/test_agents.py::test_custom_tool"
|
||||||
|
|
||||||
|
# Explain lookup paths for a test
|
||||||
|
./scripts/diagnose_recordings.py explain-paths --test-id "tests/integration/agents/test_agents.py::test_foo"
|
||||||
|
|
||||||
|
# Compare request hash computation
|
||||||
|
./scripts/diagnose_recordings.py compute-hash --endpoint /v1/chat/completions --method POST --body '{"model":"llama3.2:3b"}' --test-id "..."
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add parent directory to path to import from llama_stack
|
||||||
|
REPO_ROOT = Path(__file__).parent.parent
|
||||||
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
|
||||||
|
try:
|
||||||
|
from llama_stack.testing.api_recorder import normalize_inference_request
|
||||||
|
except ImportError:
|
||||||
|
normalize_inference_request = None
|
||||||
|
|
||||||
|
|
||||||
|
def find_hash(hash_value: str, base_dir: Path | None = None, test_id: str | None = None):
|
||||||
|
"""Find where a hash would be looked up and what exists"""
|
||||||
|
if base_dir is None:
|
||||||
|
base_dir = REPO_ROOT / "tests/integration/common"
|
||||||
|
|
||||||
|
print(f"Searching for hash: {hash_value}\n")
|
||||||
|
print(f"Base dir: {base_dir} (absolute={base_dir.is_absolute()})")
|
||||||
|
|
||||||
|
# Compute test directory
|
||||||
|
if test_id:
|
||||||
|
test_file = test_id.split("::")[0]
|
||||||
|
test_dir = Path(test_file).parent
|
||||||
|
|
||||||
|
if base_dir.is_absolute():
|
||||||
|
repo_root = base_dir.parent.parent.parent
|
||||||
|
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||||
|
else:
|
||||||
|
test_recordings_dir = test_dir / "recordings"
|
||||||
|
print(f"Test ID: {test_id}")
|
||||||
|
print(f"Test dir: {test_recordings_dir}\n")
|
||||||
|
else:
|
||||||
|
test_recordings_dir = base_dir / "recordings"
|
||||||
|
print("No test ID provided, using base dir\n")
|
||||||
|
|
||||||
|
# Check primary location
|
||||||
|
response_file = f"{hash_value}.json"
|
||||||
|
response_path = test_recordings_dir / response_file
|
||||||
|
|
||||||
|
print("Checking primary location:")
|
||||||
|
print(f" {response_path}")
|
||||||
|
if response_path.exists():
|
||||||
|
print(" EXISTS")
|
||||||
|
print("\nFound! Contents:")
|
||||||
|
show_recording(response_path)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(" Does not exist")
|
||||||
|
|
||||||
|
# Check fallback location
|
||||||
|
fallback_dir = base_dir / "recordings"
|
||||||
|
fallback_path = fallback_dir / response_file
|
||||||
|
|
||||||
|
print("\nChecking fallback location:")
|
||||||
|
print(f" {fallback_path}")
|
||||||
|
if fallback_path.exists():
|
||||||
|
print(" EXISTS")
|
||||||
|
print("\nFound in fallback! Contents:")
|
||||||
|
show_recording(fallback_path)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(" Does not exist")
|
||||||
|
|
||||||
|
# Show what files DO exist
|
||||||
|
print(f"\nFiles in test directory ({test_recordings_dir}):")
|
||||||
|
if test_recordings_dir.exists():
|
||||||
|
json_files = list(test_recordings_dir.glob("*.json"))
|
||||||
|
if json_files:
|
||||||
|
for f in json_files[:20]:
|
||||||
|
print(f" - {f.name}")
|
||||||
|
if len(json_files) > 20:
|
||||||
|
print(f" ... and {len(json_files) - 20} more")
|
||||||
|
else:
|
||||||
|
print(" (empty)")
|
||||||
|
else:
|
||||||
|
print(" Directory does not exist")
|
||||||
|
|
||||||
|
print(f"\nFiles in fallback directory ({fallback_dir}):")
|
||||||
|
if fallback_dir.exists():
|
||||||
|
json_files = list(fallback_dir.glob("*.json"))
|
||||||
|
if json_files:
|
||||||
|
for f in json_files[:20]:
|
||||||
|
print(f" - {f.name}")
|
||||||
|
if len(json_files) > 20:
|
||||||
|
print(f" ... and {len(json_files) - 20} more")
|
||||||
|
else:
|
||||||
|
print(" (empty)")
|
||||||
|
else:
|
||||||
|
print(" Directory does not exist")
|
||||||
|
|
||||||
|
# Try partial hash match
|
||||||
|
print("\nLooking for partial matches (first 16 chars)...")
|
||||||
|
partial = hash_value[:16]
|
||||||
|
matches = []
|
||||||
|
|
||||||
|
for dir_to_search in [test_recordings_dir, fallback_dir]:
|
||||||
|
if dir_to_search.exists():
|
||||||
|
for f in dir_to_search.glob("*.json"):
|
||||||
|
if f.stem.startswith(partial):
|
||||||
|
matches.append(f)
|
||||||
|
|
||||||
|
if matches:
|
||||||
|
print(f"Found {len(matches)} partial match(es):")
|
||||||
|
for m in matches:
|
||||||
|
print(f" {m}")
|
||||||
|
else:
|
||||||
|
print("No partial matches found")
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def show_recording(file_path: Path):
|
||||||
|
"""Show contents of a recording file"""
|
||||||
|
if not file_path.exists():
|
||||||
|
print(f"File does not exist: {file_path}")
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(file_path) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
print(f"\nRecording: {file_path.name}\n")
|
||||||
|
print(f"Test ID: {data.get('test_id', 'N/A')}")
|
||||||
|
print("\nRequest:")
|
||||||
|
req = data.get("request", {})
|
||||||
|
print(f" Method: {req.get('method', 'N/A')}")
|
||||||
|
print(f" URL: {req.get('url', 'N/A')}")
|
||||||
|
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
|
||||||
|
print(f" Model: {req.get('model', 'N/A')}")
|
||||||
|
|
||||||
|
body = req.get("body", {})
|
||||||
|
if body:
|
||||||
|
print("\nRequest Body:")
|
||||||
|
print(f" Model: {body.get('model', 'N/A')}")
|
||||||
|
print(f" Stream: {body.get('stream', 'N/A')}")
|
||||||
|
if "messages" in body:
|
||||||
|
print(f" Messages: {len(body['messages'])} message(s)")
|
||||||
|
for i, msg in enumerate(body["messages"][:3]):
|
||||||
|
role = msg.get("role", "unknown")
|
||||||
|
content = msg.get("content", "")
|
||||||
|
if isinstance(content, str):
|
||||||
|
preview = content[:80] + "..." if len(content) > 80 else content
|
||||||
|
else:
|
||||||
|
preview = f"[{type(content).__name__}]"
|
||||||
|
print(f" [{i}] {role}: {preview}")
|
||||||
|
if "tools" in body:
|
||||||
|
print(f" Tools: {len(body['tools'])} tool(s)")
|
||||||
|
|
||||||
|
response = data.get("response", {})
|
||||||
|
if response:
|
||||||
|
print("\nResponse:")
|
||||||
|
print(f" Is streaming: {response.get('is_streaming', False)}")
|
||||||
|
response_body = response.get("body", {})
|
||||||
|
if isinstance(response_body, dict):
|
||||||
|
if "__type__" in response_body:
|
||||||
|
print(f" Type: {response_body['__type__']}")
|
||||||
|
if "__data__" in response_body:
|
||||||
|
response_data = response_body["__data__"]
|
||||||
|
if "choices" in response_data:
|
||||||
|
print(f" Choices: {len(response_data['choices'])}")
|
||||||
|
if "usage" in response_data:
|
||||||
|
usage = response_data["usage"]
|
||||||
|
print(f" Usage: in={usage.get('input_tokens')}, out={usage.get('output_tokens')}")
|
||||||
|
|
||||||
|
|
||||||
|
def list_test_recordings(test_id: str, base_dir: Path | None = None):
|
||||||
|
"""List all recordings for a specific test"""
|
||||||
|
if base_dir is None:
|
||||||
|
base_dir = REPO_ROOT / "tests/integration/common"
|
||||||
|
|
||||||
|
test_file = test_id.split("::")[0]
|
||||||
|
test_dir = Path(test_file).parent
|
||||||
|
|
||||||
|
if base_dir.is_absolute():
|
||||||
|
repo_root = base_dir.parent.parent.parent
|
||||||
|
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||||
|
else:
|
||||||
|
test_recordings_dir = test_dir / "recordings"
|
||||||
|
|
||||||
|
print(f"Recordings for test: {test_id}\n")
|
||||||
|
print(f"Directory: {test_recordings_dir}\n")
|
||||||
|
|
||||||
|
if not test_recordings_dir.exists():
|
||||||
|
print("Directory does not exist")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Find all recordings for this specific test
|
||||||
|
recordings = []
|
||||||
|
for f in test_recordings_dir.glob("*.json"):
|
||||||
|
try:
|
||||||
|
with open(f) as fp:
|
||||||
|
data = json.load(fp)
|
||||||
|
if data.get("test_id") == test_id:
|
||||||
|
recordings.append((f, data))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Could not read {f.name}: {e}")
|
||||||
|
|
||||||
|
if not recordings:
|
||||||
|
print("No recordings found for this exact test ID")
|
||||||
|
print("\nAll files in directory:")
|
||||||
|
for f in test_recordings_dir.glob("*.json"):
|
||||||
|
print(f" - {f.name}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"Found {len(recordings)} recording(s):\n")
|
||||||
|
for f, data in recordings:
|
||||||
|
req = data.get("request", {})
|
||||||
|
print(f" {f.name}")
|
||||||
|
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
|
||||||
|
print(f" Model: {req.get('model', 'N/A')}")
|
||||||
|
print("")
|
||||||
|
|
||||||
|
|
||||||
|
def explain_paths(test_id: str | None = None, base_dir: Path | None = None):
|
||||||
|
"""Explain where recordings would be searched"""
|
||||||
|
if base_dir is None:
|
||||||
|
base_dir = REPO_ROOT / "tests/integration/common"
|
||||||
|
|
||||||
|
print("Recording Lookup Path Explanation\n")
|
||||||
|
print(f"Base directory: {base_dir}")
|
||||||
|
print(f" Absolute: {base_dir.is_absolute()}")
|
||||||
|
print("")
|
||||||
|
|
||||||
|
if test_id:
|
||||||
|
print(f"Test ID: {test_id}")
|
||||||
|
test_file = test_id.split("::")[0]
|
||||||
|
print(f" Test file: {test_file}")
|
||||||
|
|
||||||
|
test_dir = Path(test_file).parent
|
||||||
|
print(f" Test dir (relative): {test_dir}")
|
||||||
|
|
||||||
|
if base_dir.is_absolute():
|
||||||
|
repo_root = base_dir.parent.parent.parent
|
||||||
|
print(f" Repo root: {repo_root}")
|
||||||
|
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||||
|
print(f" Test recordings dir (absolute): {test_recordings_dir}")
|
||||||
|
else:
|
||||||
|
test_recordings_dir = test_dir / "recordings"
|
||||||
|
print(f" Test recordings dir (relative): {test_recordings_dir}")
|
||||||
|
|
||||||
|
print("\nLookup order for recordings:")
|
||||||
|
print(f" 1. Test-specific: {test_recordings_dir}/<hash>.json")
|
||||||
|
print(f" 2. Fallback: {base_dir}/recordings/<hash>.json")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("No test ID provided")
|
||||||
|
print("\nLookup location:")
|
||||||
|
print(f" {base_dir}/recordings/<hash>.json")
|
||||||
|
|
||||||
|
|
||||||
|
def compute_hash(endpoint: str, method: str, body_json: str, test_id: str | None = None):
|
||||||
|
"""Compute hash for a request"""
|
||||||
|
if normalize_inference_request is None:
|
||||||
|
print("Could not import normalize_inference_request from llama_stack.testing.api_recorder")
|
||||||
|
print("Make sure you're running from the repo root with proper PYTHONPATH")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
body = json.loads(body_json)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f"Invalid JSON in body: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create a fake URL with the endpoint
|
||||||
|
url = f"http://example.com{endpoint}"
|
||||||
|
|
||||||
|
# Set test context if provided
|
||||||
|
if test_id:
|
||||||
|
from llama_stack.core.testing_context import set_test_context
|
||||||
|
|
||||||
|
set_test_context(test_id)
|
||||||
|
|
||||||
|
hash_result = normalize_inference_request(method, url, {}, body)
|
||||||
|
|
||||||
|
print("Hash Computation\n")
|
||||||
|
print(f"Method: {method}")
|
||||||
|
print(f"Endpoint: {endpoint}")
|
||||||
|
print(f"Test ID: {test_id or 'None (excluded from hash for model-list endpoints)'}")
|
||||||
|
print("\nBody:")
|
||||||
|
print(json.dumps(body, indent=2))
|
||||||
|
print(f"\nComputed Hash: {hash_result}")
|
||||||
|
print(f"\nLooking for file: {hash_result}.json")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Diagnostic tool for test recording issues",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog=__doc__,
|
||||||
|
)
|
||||||
|
|
||||||
|
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||||
|
|
||||||
|
# find-hash command
|
||||||
|
find_parser = subparsers.add_parser("find-hash", help="Find where a hash would be looked up")
|
||||||
|
find_parser.add_argument("hash", help="Hash value to search for (full or partial)")
|
||||||
|
find_parser.add_argument("--test-id", help="Test ID to determine search paths")
|
||||||
|
find_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||||
|
|
||||||
|
# show command
|
||||||
|
show_parser = subparsers.add_parser("show", help="Show contents of a recording file")
|
||||||
|
show_parser.add_argument("file", type=Path, help="Path to recording JSON file")
|
||||||
|
|
||||||
|
# list-test command
|
||||||
|
list_parser = subparsers.add_parser("list-test", help="List all recordings for a test")
|
||||||
|
list_parser.add_argument("test_id", help="Full test ID (e.g., tests/integration/agents/test_agents.py::test_foo)")
|
||||||
|
list_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||||
|
|
||||||
|
# explain-paths command
|
||||||
|
explain_parser = subparsers.add_parser("explain-paths", help="Explain where recordings are searched")
|
||||||
|
explain_parser.add_argument("--test-id", help="Test ID to show paths for")
|
||||||
|
explain_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||||
|
|
||||||
|
# compute-hash command
|
||||||
|
hash_parser = subparsers.add_parser("compute-hash", help="Compute hash for a request")
|
||||||
|
hash_parser.add_argument("--endpoint", required=True, help="Endpoint path (e.g., /v1/chat/completions)")
|
||||||
|
hash_parser.add_argument("--method", default="POST", help="HTTP method (default: POST)")
|
||||||
|
hash_parser.add_argument("--body", required=True, help="Request body as JSON string")
|
||||||
|
hash_parser.add_argument("--test-id", help="Test ID (affects hash for non-model-list endpoints)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.command:
|
||||||
|
parser.print_help()
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.command == "find-hash":
|
||||||
|
find_hash(args.hash, args.base_dir, args.test_id)
|
||||||
|
elif args.command == "show":
|
||||||
|
show_recording(args.file)
|
||||||
|
elif args.command == "list-test":
|
||||||
|
list_test_recordings(args.test_id, args.base_dir)
|
||||||
|
elif args.command == "explain-paths":
|
||||||
|
explain_paths(args.test_id, args.base_dir)
|
||||||
|
elif args.command == "compute-hash":
|
||||||
|
compute_hash(args.endpoint, args.method, args.body, args.test_id)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
358
scripts/docker.sh
Executable file
358
scripts/docker.sh
Executable file
|
|
@ -0,0 +1,358 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Docker container management script for Llama Stack
|
||||||
|
# Allows starting/stopping/restarting a Llama Stack docker container for testing
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
DISTRO=""
|
||||||
|
PORT=8321
|
||||||
|
INFERENCE_MODE="replay"
|
||||||
|
COMMAND=""
|
||||||
|
USE_COPY_NOT_MOUNT=false
|
||||||
|
NO_REBUILD=false
|
||||||
|
|
||||||
|
# Function to display usage
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage: $0 COMMAND [OPTIONS]
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
start Build and start the docker container
|
||||||
|
stop Stop and remove the docker container
|
||||||
|
restart Restart the docker container
|
||||||
|
status Check if the container is running
|
||||||
|
logs Show container logs (add -f to follow)
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--distro STRING Distribution name (e.g., 'ci-tests', 'starter') (required for start/restart)
|
||||||
|
--port NUMBER Port to run on (default: 8321)
|
||||||
|
--inference-mode STRING Inference mode: replay, record-if-missing or record (default: replay)
|
||||||
|
--copy-source Copy source into image instead of mounting (default: auto-detect CI, otherwise mount)
|
||||||
|
--no-rebuild Skip building the image, just start the container (default: false)
|
||||||
|
--help Show this help message
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
# Start a docker container (local dev mode - mounts source, builds image)
|
||||||
|
$0 start --distro ci-tests
|
||||||
|
|
||||||
|
# Start without rebuilding (uses existing image)
|
||||||
|
$0 start --distro ci-tests --no-rebuild
|
||||||
|
|
||||||
|
# Start with source copied into image (like CI)
|
||||||
|
$0 start --distro ci-tests --copy-source
|
||||||
|
|
||||||
|
# Start with custom port
|
||||||
|
$0 start --distro starter --port 8080
|
||||||
|
|
||||||
|
# Check status
|
||||||
|
$0 status --distro ci-tests
|
||||||
|
|
||||||
|
# View logs
|
||||||
|
$0 logs --distro ci-tests
|
||||||
|
|
||||||
|
# Stop container
|
||||||
|
$0 stop --distro ci-tests
|
||||||
|
|
||||||
|
# Restart container
|
||||||
|
$0 restart --distro ci-tests
|
||||||
|
|
||||||
|
Note: In CI environments (detected via CI or GITHUB_ACTIONS env vars), source is
|
||||||
|
automatically copied into the image. Locally, source is mounted for live development
|
||||||
|
unless --copy-source is specified.
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse command (first positional arg)
|
||||||
|
if [[ $# -eq 0 ]]; then
|
||||||
|
echo "Error: Command required"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
COMMAND="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
|
# Validate command
|
||||||
|
case "$COMMAND" in
|
||||||
|
start | stop | restart | status | logs) ;;
|
||||||
|
--help)
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Error: Unknown command: $COMMAND"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Parse options
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--distro)
|
||||||
|
DISTRO="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--port)
|
||||||
|
PORT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--inference-mode)
|
||||||
|
INFERENCE_MODE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--copy-source)
|
||||||
|
USE_COPY_NOT_MOUNT=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--no-rebuild)
|
||||||
|
NO_REBUILD=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--help)
|
||||||
|
usage
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Validate required parameters for commands that need them
|
||||||
|
if [[ "$COMMAND" != "stop" && "$COMMAND" != "status" && "$COMMAND" != "logs" ]]; then
|
||||||
|
if [[ -z "$DISTRO" ]]; then
|
||||||
|
echo "Error: --distro is required for '$COMMAND' command"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# If distro not provided for stop/status/logs, try to infer from running containers
|
||||||
|
if [[ -z "$DISTRO" && ("$COMMAND" == "stop" || "$COMMAND" == "status" || "$COMMAND" == "logs") ]]; then
|
||||||
|
# Look for any llama-stack-test-* container
|
||||||
|
RUNNING_CONTAINERS=$(docker ps -a --filter "name=llama-stack-test-" --format "{{.Names}}" | head -1)
|
||||||
|
if [[ -n "$RUNNING_CONTAINERS" ]]; then
|
||||||
|
DISTRO=$(echo "$RUNNING_CONTAINERS" | sed 's/llama-stack-test-//')
|
||||||
|
echo "Found running container for distro: $DISTRO"
|
||||||
|
else
|
||||||
|
echo "Error: --distro is required (no running containers found)"
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Remove docker: prefix if present
|
||||||
|
DISTRO=$(echo "$DISTRO" | sed 's/^docker://')
|
||||||
|
|
||||||
|
CONTAINER_NAME="llama-stack-test-$DISTRO"
|
||||||
|
|
||||||
|
# Function to check if container is running
|
||||||
|
is_container_running() {
|
||||||
|
docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to check if container exists (running or stopped)
|
||||||
|
container_exists() {
|
||||||
|
docker ps -a --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to stop and remove container
|
||||||
|
stop_container() {
|
||||||
|
if container_exists; then
|
||||||
|
echo "Stopping container: $CONTAINER_NAME"
|
||||||
|
docker stop "$CONTAINER_NAME" 2>/dev/null || true
|
||||||
|
echo "Removing container: $CONTAINER_NAME"
|
||||||
|
docker rm "$CONTAINER_NAME" 2>/dev/null || true
|
||||||
|
echo "✅ Container stopped and removed"
|
||||||
|
else
|
||||||
|
echo "⚠️ Container $CONTAINER_NAME does not exist"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to build docker image
|
||||||
|
build_image() {
|
||||||
|
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||||
|
# Get the repo root (parent of scripts directory)
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
|
||||||
|
# Determine whether to copy or mount source
|
||||||
|
# Copy in CI or if explicitly requested, otherwise mount for live development
|
||||||
|
BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
|
||||||
|
if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
|
||||||
|
echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
|
||||||
|
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
|
||||||
|
else
|
||||||
|
echo "Will mount source for live development"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
|
||||||
|
echo "❌ Failed to build Docker image"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "✅ Docker image built successfully"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to start container
|
||||||
|
start_container() {
|
||||||
|
# Check if already running
|
||||||
|
if is_container_running; then
|
||||||
|
echo "⚠️ Container $CONTAINER_NAME is already running"
|
||||||
|
echo "URL: http://localhost:$PORT"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Stop and remove if exists but not running
|
||||||
|
if container_exists; then
|
||||||
|
echo "Removing existing stopped container..."
|
||||||
|
docker rm "$CONTAINER_NAME" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Build the image unless --no-rebuild was specified
|
||||||
|
if [[ "$NO_REBUILD" == "true" ]]; then
|
||||||
|
echo "Skipping build (--no-rebuild specified)"
|
||||||
|
# Check if image exists (with or without localhost/ prefix)
|
||||||
|
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
|
||||||
|
echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
|
||||||
|
echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "✅ Found existing image for distribution-$DISTRO:dev"
|
||||||
|
else
|
||||||
|
build_image
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Starting Docker Container ==="
|
||||||
|
|
||||||
|
# Get the repo root for volume mount
|
||||||
|
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||||
|
|
||||||
|
# Determine the actual image name (may have localhost/ prefix)
|
||||||
|
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
|
||||||
|
if [[ -z "$IMAGE_NAME" ]]; then
|
||||||
|
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Using image: $IMAGE_NAME"
|
||||||
|
|
||||||
|
# Build environment variables for docker run
|
||||||
|
DOCKER_ENV_VARS=""
|
||||||
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||||
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||||
|
|
||||||
|
# Set default OLLAMA_URL if not provided
|
||||||
|
# On macOS/Windows, use host.docker.internal to reach host from container
|
||||||
|
# On Linux with --network host, use localhost
|
||||||
|
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
|
||||||
|
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
|
||||||
|
else
|
||||||
|
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
|
||||||
|
fi
|
||||||
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||||
|
|
||||||
|
# Pass through API keys if they exist
|
||||||
|
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||||
|
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
|
||||||
|
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
|
||||||
|
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
|
||||||
|
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
|
||||||
|
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
|
||||||
|
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
|
||||||
|
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
|
||||||
|
[ -n "${SQLITE_STORE_DIR:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SQLITE_STORE_DIR=$SQLITE_STORE_DIR"
|
||||||
|
|
||||||
|
# Use --network host on Linux only (macOS doesn't support it properly)
|
||||||
|
NETWORK_MODE=""
|
||||||
|
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||||
|
NETWORK_MODE="--network host"
|
||||||
|
fi
|
||||||
|
|
||||||
|
docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
|
||||||
|
-p $PORT:$PORT \
|
||||||
|
$DOCKER_ENV_VARS \
|
||||||
|
-v "$REPO_ROOT":/app/llama-stack-source \
|
||||||
|
"$IMAGE_NAME" \
|
||||||
|
--port $PORT
|
||||||
|
|
||||||
|
echo "Waiting for container to start..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:$PORT/v1/health 2>/dev/null | grep -q "OK"; then
|
||||||
|
echo "✅ Container started successfully"
|
||||||
|
echo ""
|
||||||
|
echo "=== Container Information ==="
|
||||||
|
echo "Container name: $CONTAINER_NAME"
|
||||||
|
echo "URL: http://localhost:$PORT"
|
||||||
|
echo "Health check: http://localhost:$PORT/v1/health"
|
||||||
|
echo ""
|
||||||
|
echo "To view logs: $0 logs --distro $DISTRO"
|
||||||
|
echo "To stop: $0 stop --distro $DISTRO"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [[ $i -eq 30 ]]; then
|
||||||
|
echo "❌ Container failed to start within timeout"
|
||||||
|
echo "Showing container logs:"
|
||||||
|
docker logs "$CONTAINER_NAME"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute command
|
||||||
|
case "$COMMAND" in
|
||||||
|
start)
|
||||||
|
start_container
|
||||||
|
;;
|
||||||
|
stop)
|
||||||
|
stop_container
|
||||||
|
;;
|
||||||
|
restart)
|
||||||
|
echo "Restarting container: $CONTAINER_NAME"
|
||||||
|
stop_container
|
||||||
|
echo ""
|
||||||
|
start_container
|
||||||
|
;;
|
||||||
|
status)
|
||||||
|
if is_container_running; then
|
||||||
|
echo "✅ Container $CONTAINER_NAME is running"
|
||||||
|
echo "URL: http://localhost:$PORT"
|
||||||
|
# Try to get the actual port from the container
|
||||||
|
ACTUAL_PORT=$(docker port "$CONTAINER_NAME" 2>/dev/null | grep "8321/tcp" | cut -d':' -f2 | head -1)
|
||||||
|
if [[ -n "$ACTUAL_PORT" ]]; then
|
||||||
|
echo "Port: $ACTUAL_PORT"
|
||||||
|
fi
|
||||||
|
elif container_exists; then
|
||||||
|
echo "⚠️ Container $CONTAINER_NAME exists but is not running"
|
||||||
|
echo "Start it with: $0 start --distro $DISTRO"
|
||||||
|
else
|
||||||
|
echo "❌ Container $CONTAINER_NAME does not exist"
|
||||||
|
echo "Start it with: $0 start --distro $DISTRO"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
logs)
|
||||||
|
if container_exists; then
|
||||||
|
echo "=== Logs for $CONTAINER_NAME ==="
|
||||||
|
# Check if -f flag was passed after 'logs' command
|
||||||
|
if [[ "${1:-}" == "-f" || "${1:-}" == "--follow" ]]; then
|
||||||
|
docker logs --tail 100 --follow "$CONTAINER_NAME"
|
||||||
|
else
|
||||||
|
docker logs --tail 100 "$CONTAINER_NAME"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "❌ Container $CONTAINER_NAME does not exist"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
@ -5,10 +5,10 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
[ -z "$BASH_VERSION" ] && {
|
[ -z "${BASH_VERSION:-}" ] && exec /usr/bin/env bash "$0" "$@"
|
||||||
echo "This script must be run with bash" >&2
|
if set -o | grep -Eq 'posix[[:space:]]+on'; then
|
||||||
exit 1
|
exec /usr/bin/env bash "$0" "$@"
|
||||||
}
|
fi
|
||||||
|
|
||||||
set -Eeuo pipefail
|
set -Eeuo pipefail
|
||||||
|
|
||||||
|
|
@ -18,12 +18,110 @@ MODEL_ALIAS="llama3.2:3b"
|
||||||
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
||||||
WAIT_TIMEOUT=30
|
WAIT_TIMEOUT=30
|
||||||
TEMP_LOG=""
|
TEMP_LOG=""
|
||||||
|
WITH_TELEMETRY=true
|
||||||
|
TELEMETRY_SERVICE_NAME="llama-stack"
|
||||||
|
TELEMETRY_SINKS="otel_trace,otel_metric"
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT="http://otel-collector:4318"
|
||||||
|
TEMP_TELEMETRY_DIR=""
|
||||||
|
|
||||||
|
materialize_telemetry_configs() {
|
||||||
|
local dest="$1"
|
||||||
|
mkdir -p "$dest"
|
||||||
|
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||||
|
local prom_cfg="${dest}/prometheus.yml"
|
||||||
|
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||||
|
|
||||||
|
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
||||||
|
if [ -e "$asset" ]; then
|
||||||
|
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
cat <<'EOF' > "$otel_cfg"
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: 0.0.0.0:4317
|
||||||
|
http:
|
||||||
|
endpoint: 0.0.0.0:4318
|
||||||
|
|
||||||
|
processors:
|
||||||
|
batch:
|
||||||
|
timeout: 1s
|
||||||
|
send_batch_size: 1024
|
||||||
|
|
||||||
|
exporters:
|
||||||
|
# Export traces to Jaeger
|
||||||
|
otlp/jaeger:
|
||||||
|
endpoint: jaeger:4317
|
||||||
|
tls:
|
||||||
|
insecure: true
|
||||||
|
|
||||||
|
# Export metrics to Prometheus
|
||||||
|
prometheus:
|
||||||
|
endpoint: 0.0.0.0:9464
|
||||||
|
namespace: llama_stack
|
||||||
|
|
||||||
|
# Debug exporter for troubleshooting
|
||||||
|
debug:
|
||||||
|
verbosity: detailed
|
||||||
|
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
traces:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [otlp/jaeger, debug]
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [prometheus, debug]
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<'EOF' > "$prom_cfg"
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
- job_name: 'otel-collector'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['otel-collector:9464']
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<'EOF' > "$graf_cfg"
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
editable: true
|
||||||
|
|
||||||
|
- name: Jaeger
|
||||||
|
type: jaeger
|
||||||
|
access: proxy
|
||||||
|
url: http://jaeger:16686
|
||||||
|
editable: true
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
# Cleanup function to remove temporary files
|
# Cleanup function to remove temporary files
|
||||||
cleanup() {
|
cleanup() {
|
||||||
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
||||||
rm -f "$TEMP_LOG"
|
rm -f "$TEMP_LOG"
|
||||||
fi
|
fi
|
||||||
|
if [ -n "$TEMP_TELEMETRY_DIR" ] && [ -d "$TEMP_TELEMETRY_DIR" ]; then
|
||||||
|
rm -rf "$TEMP_TELEMETRY_DIR"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Set up trap to clean up on exit, error, or interrupt
|
# Set up trap to clean up on exit, error, or interrupt
|
||||||
|
|
@ -32,7 +130,7 @@ trap cleanup EXIT ERR INT TERM
|
||||||
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
|
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
|
||||||
die(){
|
die(){
|
||||||
printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2
|
printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2
|
||||||
printf "\e[1;31m🐛 Report an issue @ https://github.com/meta-llama/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
printf "\e[1;31m🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,6 +187,12 @@ Options:
|
||||||
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
||||||
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
||||||
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
||||||
|
--with-telemetry Provision Jaeger, OTEL Collector, Prometheus, and Grafana (default: enabled)
|
||||||
|
--no-telemetry, --without-telemetry
|
||||||
|
Skip provisioning the telemetry stack
|
||||||
|
--telemetry-service NAME Service name reported to telemetry (default: ${TELEMETRY_SERVICE_NAME})
|
||||||
|
--telemetry-sinks SINKS Comma-separated telemetry sinks (default: ${TELEMETRY_SINKS})
|
||||||
|
--otel-endpoint URL OTLP endpoint provided to Llama Stack (default: ${OTEL_EXPORTER_OTLP_ENDPOINT})
|
||||||
-h, --help Show this help message
|
-h, --help Show this help message
|
||||||
|
|
||||||
For more information:
|
For more information:
|
||||||
|
|
@ -127,6 +231,26 @@ while [[ $# -gt 0 ]]; do
|
||||||
WAIT_TIMEOUT="$2"
|
WAIT_TIMEOUT="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--with-telemetry)
|
||||||
|
WITH_TELEMETRY=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--no-telemetry|--without-telemetry)
|
||||||
|
WITH_TELEMETRY=false
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--telemetry-service)
|
||||||
|
TELEMETRY_SERVICE_NAME="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--telemetry-sinks)
|
||||||
|
TELEMETRY_SINKS="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--otel-endpoint)
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
die "Unknown option: $1"
|
die "Unknown option: $1"
|
||||||
;;
|
;;
|
||||||
|
|
@ -171,7 +295,11 @@ if [ "$ENGINE" = "podman" ] && [ "$(uname -s)" = "Darwin" ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Clean up any leftovers from earlier runs
|
# Clean up any leftovers from earlier runs
|
||||||
for name in ollama-server llama-stack; do
|
containers=(ollama-server llama-stack)
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
containers+=(jaeger otel-collector prometheus grafana)
|
||||||
|
fi
|
||||||
|
for name in "${containers[@]}"; do
|
||||||
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
||||||
if [ -n "$ids" ]; then
|
if [ -n "$ids" ]; then
|
||||||
log "⚠️ Found existing container(s) for '${name}', removing..."
|
log "⚠️ Found existing container(s) for '${name}', removing..."
|
||||||
|
|
@ -191,6 +319,64 @@ if ! $ENGINE network inspect llama-net >/dev/null 2>&1; then
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Telemetry Stack
|
||||||
|
###############################################################################
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
TEMP_TELEMETRY_DIR="$(mktemp -d)"
|
||||||
|
TELEMETRY_ASSETS_DIR="$TEMP_TELEMETRY_DIR"
|
||||||
|
log "🧰 Materializing telemetry configs..."
|
||||||
|
materialize_telemetry_configs "$TELEMETRY_ASSETS_DIR"
|
||||||
|
|
||||||
|
log "📡 Starting telemetry stack..."
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name jaeger \
|
||||||
|
--network llama-net \
|
||||||
|
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 14250:14250 \
|
||||||
|
-p 9411:9411 \
|
||||||
|
docker.io/jaegertracing/all-in-one:latest > /dev/null 2>&1; then
|
||||||
|
die "Jaeger startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name otel-collector \
|
||||||
|
--network llama-net \
|
||||||
|
-p 4318:4318 \
|
||||||
|
-p 4317:4317 \
|
||||||
|
-p 9464:9464 \
|
||||||
|
-p 13133:13133 \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z" \
|
||||||
|
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||||
|
--config /etc/otel-collector-config.yaml > /dev/null 2>&1; then
|
||||||
|
die "OpenTelemetry Collector startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name prometheus \
|
||||||
|
--network llama-net \
|
||||||
|
-p 9090:9090 \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/prometheus.yml:/etc/prometheus/prometheus.yml:Z" \
|
||||||
|
docker.io/prom/prometheus:latest \
|
||||||
|
--config.file=/etc/prometheus/prometheus.yml \
|
||||||
|
--storage.tsdb.path=/prometheus \
|
||||||
|
--web.console.libraries=/etc/prometheus/console_libraries \
|
||||||
|
--web.console.templates=/etc/prometheus/consoles \
|
||||||
|
--storage.tsdb.retention.time=200h \
|
||||||
|
--web.enable-lifecycle > /dev/null 2>&1; then
|
||||||
|
die "Prometheus startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name grafana \
|
||||||
|
--network llama-net \
|
||||||
|
-p 3000:3000 \
|
||||||
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||||
|
die "Grafana startup failed"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# 1. Ollama
|
# 1. Ollama
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
@ -218,9 +404,19 @@ fi
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# 2. Llama‑Stack
|
# 2. Llama‑Stack
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
server_env_opts=()
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
server_env_opts+=(
|
||||||
|
-e TELEMETRY_SINKS="${TELEMETRY_SINKS}"
|
||||||
|
-e OTEL_EXPORTER_OTLP_ENDPOINT="${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||||
|
-e OTEL_SERVICE_NAME="${TELEMETRY_SERVICE_NAME}"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
||||||
--network llama-net \
|
--network llama-net \
|
||||||
-p "${PORT}:${PORT}" \
|
-p "${PORT}:${PORT}" \
|
||||||
|
"${server_env_opts[@]}" \
|
||||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
||||||
"${SERVER_IMAGE}" --port "${PORT}")
|
"${SERVER_IMAGE}" --port "${PORT}")
|
||||||
|
|
||||||
|
|
@ -244,5 +440,12 @@ log "👉 API endpoint: http://localhost:${PORT}"
|
||||||
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
||||||
log "💻 To access the llama stack CLI, exec into the container:"
|
log "💻 To access the llama stack CLI, exec into the container:"
|
||||||
log " $ENGINE exec -ti llama-stack bash"
|
log " $ENGINE exec -ti llama-stack bash"
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
log "📡 Telemetry dashboards:"
|
||||||
|
log " Jaeger UI: http://localhost:16686"
|
||||||
|
log " Prometheus UI: http://localhost:9090"
|
||||||
|
log " Grafana UI: http://localhost:3000 (admin/admin)"
|
||||||
|
log " OTEL Collector: http://localhost:4318"
|
||||||
|
fi
|
||||||
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
||||||
log ""
|
log ""
|
||||||
|
|
|
||||||
|
|
@ -42,9 +42,12 @@ Setups are defined in tests/integration/setups.py and provide global configurati
|
||||||
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
|
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
# Basic inference tests with ollama
|
# Basic inference tests with ollama (server mode)
|
||||||
$0 --stack-config server:ci-tests --suite base --setup ollama
|
$0 --stack-config server:ci-tests --suite base --setup ollama
|
||||||
|
|
||||||
|
# Basic inference tests with docker
|
||||||
|
$0 --stack-config docker:ci-tests --suite base --setup ollama
|
||||||
|
|
||||||
# Multiple test directories with vllm
|
# Multiple test directories with vllm
|
||||||
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
|
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
|
||||||
|
|
||||||
|
|
@ -153,7 +156,7 @@ echo "Setting SQLITE_STORE_DIR: $SQLITE_STORE_DIR"
|
||||||
|
|
||||||
# Determine stack config type for api_recorder test isolation
|
# Determine stack config type for api_recorder test isolation
|
||||||
if [[ "$COLLECT_ONLY" == false ]]; then
|
if [[ "$COLLECT_ONLY" == false ]]; then
|
||||||
if [[ "$STACK_CONFIG" == server:* ]]; then
|
if [[ "$STACK_CONFIG" == server:* ]] || [[ "$STACK_CONFIG" == docker:* ]]; then
|
||||||
export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server"
|
export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server"
|
||||||
echo "Setting stack config type: server"
|
echo "Setting stack config type: server"
|
||||||
else
|
else
|
||||||
|
|
@ -229,6 +232,104 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
||||||
trap stop_server EXIT ERR INT TERM
|
trap stop_server EXIT ERR INT TERM
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Start Docker Container if needed
|
||||||
|
if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
||||||
|
stop_container() {
|
||||||
|
echo "Stopping Docker container..."
|
||||||
|
container_name="llama-stack-test-$DISTRO"
|
||||||
|
if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
|
||||||
|
echo "Stopping and removing container: $container_name"
|
||||||
|
docker stop "$container_name" 2>/dev/null || true
|
||||||
|
docker rm "$container_name" 2>/dev/null || true
|
||||||
|
else
|
||||||
|
echo "No container named $container_name found"
|
||||||
|
fi
|
||||||
|
echo "Docker container stopped"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract distribution name from docker:distro format
|
||||||
|
DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://')
|
||||||
|
export LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
|
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||||
|
# Set LLAMA_STACK_DIR to repo root
|
||||||
|
# USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
|
||||||
|
BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
|
||||||
|
if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
|
||||||
|
echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
|
||||||
|
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
|
||||||
|
else
|
||||||
|
echo "Local mode: will mount source for live development"
|
||||||
|
fi
|
||||||
|
|
||||||
|
eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
|
||||||
|
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "❌ Failed to build Docker image"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=== Starting Docker Container ==="
|
||||||
|
container_name="llama-stack-test-$DISTRO"
|
||||||
|
|
||||||
|
# Stop and remove existing container if it exists
|
||||||
|
docker stop "$container_name" 2>/dev/null || true
|
||||||
|
docker rm "$container_name" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Build environment variables for docker run
|
||||||
|
DOCKER_ENV_VARS=""
|
||||||
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||||
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||||
|
|
||||||
|
# Pass through API keys if they exist
|
||||||
|
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||||
|
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
|
||||||
|
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
|
||||||
|
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
|
||||||
|
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
|
||||||
|
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
|
||||||
|
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
|
||||||
|
[ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||||
|
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
|
||||||
|
|
||||||
|
# Determine the actual image name (may have localhost/ prefix)
|
||||||
|
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
|
||||||
|
if [[ -z "$IMAGE_NAME" ]]; then
|
||||||
|
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Using image: $IMAGE_NAME"
|
||||||
|
|
||||||
|
docker run -d --network host --name "$container_name" \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
$DOCKER_ENV_VARS \
|
||||||
|
-v $ROOT_DIR:/app/llama-stack-source \
|
||||||
|
"$IMAGE_NAME" \
|
||||||
|
--port $LLAMA_STACK_PORT
|
||||||
|
|
||||||
|
echo "Waiting for Docker container to start..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then
|
||||||
|
echo "✅ Docker container started successfully"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [[ $i -eq 30 ]]; then
|
||||||
|
echo "❌ Docker container failed to start"
|
||||||
|
echo "Container logs:"
|
||||||
|
docker logs "$container_name"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Update STACK_CONFIG to point to the running container
|
||||||
|
STACK_CONFIG="http://localhost:$LLAMA_STACK_PORT"
|
||||||
|
|
||||||
|
trap stop_container EXIT ERR INT TERM
|
||||||
|
fi
|
||||||
|
|
||||||
# Run tests
|
# Run tests
|
||||||
echo "=== Running Integration Tests ==="
|
echo "=== Running Integration Tests ==="
|
||||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||||
|
|
|
||||||
|
|
@ -70,10 +70,15 @@ class BatchHelper:
|
||||||
):
|
):
|
||||||
"""Wait for a batch to reach a terminal status.
|
"""Wait for a batch to reach a terminal status.
|
||||||
|
|
||||||
|
Uses exponential backoff polling strategy for efficient waiting:
|
||||||
|
- Starts with short intervals (0.1s) for fast batches (e.g., replay mode)
|
||||||
|
- Doubles interval each iteration up to a maximum
|
||||||
|
- Adapts automatically to both fast and slow batch processing
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
batch_id: The batch ID to monitor
|
batch_id: The batch ID to monitor
|
||||||
max_wait_time: Maximum time to wait in seconds (default: 60 seconds)
|
max_wait_time: Maximum time to wait in seconds (default: 60 seconds)
|
||||||
sleep_interval: Time to sleep between checks in seconds (default: 1/10th of max_wait_time, min 1s, max 15s)
|
sleep_interval: If provided, uses fixed interval instead of exponential backoff
|
||||||
expected_statuses: Set of expected terminal statuses (default: {"completed"})
|
expected_statuses: Set of expected terminal statuses (default: {"completed"})
|
||||||
timeout_action: Action on timeout - "fail" (pytest.fail) or "skip" (pytest.skip)
|
timeout_action: Action on timeout - "fail" (pytest.fail) or "skip" (pytest.skip)
|
||||||
|
|
||||||
|
|
@ -84,10 +89,6 @@ class BatchHelper:
|
||||||
pytest.Failed: If batch reaches an unexpected status or timeout_action is "fail"
|
pytest.Failed: If batch reaches an unexpected status or timeout_action is "fail"
|
||||||
pytest.Skipped: If timeout_action is "skip" on timeout or unexpected status
|
pytest.Skipped: If timeout_action is "skip" on timeout or unexpected status
|
||||||
"""
|
"""
|
||||||
if sleep_interval is None:
|
|
||||||
# Default to 1/10th of max_wait_time, with min 1s and max 15s
|
|
||||||
sleep_interval = max(1, min(15, max_wait_time // 10))
|
|
||||||
|
|
||||||
if expected_statuses is None:
|
if expected_statuses is None:
|
||||||
expected_statuses = {"completed"}
|
expected_statuses = {"completed"}
|
||||||
|
|
||||||
|
|
@ -95,6 +96,15 @@ class BatchHelper:
|
||||||
unexpected_statuses = terminal_statuses - expected_statuses
|
unexpected_statuses = terminal_statuses - expected_statuses
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
|
# Use exponential backoff if no explicit sleep_interval provided
|
||||||
|
if sleep_interval is None:
|
||||||
|
current_interval = 0.1 # Start with 100ms
|
||||||
|
max_interval = 10.0 # Cap at 10 seconds
|
||||||
|
else:
|
||||||
|
current_interval = sleep_interval
|
||||||
|
max_interval = sleep_interval
|
||||||
|
|
||||||
while time.time() - start_time < max_wait_time:
|
while time.time() - start_time < max_wait_time:
|
||||||
current_batch = self.client.batches.retrieve(batch_id)
|
current_batch = self.client.batches.retrieve(batch_id)
|
||||||
|
|
||||||
|
|
@ -107,7 +117,11 @@ class BatchHelper:
|
||||||
else:
|
else:
|
||||||
pytest.fail(error_msg)
|
pytest.fail(error_msg)
|
||||||
|
|
||||||
time.sleep(sleep_interval)
|
time.sleep(current_interval)
|
||||||
|
|
||||||
|
# Exponential backoff: double the interval each time, up to max
|
||||||
|
if sleep_interval is None:
|
||||||
|
current_interval = min(current_interval * 2, max_interval)
|
||||||
|
|
||||||
timeout_msg = f"Batch did not reach expected status {expected_statuses} within {max_wait_time} seconds"
|
timeout_msg = f"Batch did not reach expected status {expected_statuses} within {max_wait_time} seconds"
|
||||||
if timeout_action == "skip":
|
if timeout_action == "skip":
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,506 @@
|
||||||
|
{
|
||||||
|
"test_id": null,
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful assistant"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What is 2 + 2?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "The answer to the equation 2 + 2 is 4."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Tell me a short joke"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 0,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama3.2:3b-instruct-fp16"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "Why",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " did",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " the",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " scare",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "crow",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " win",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " an",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " award",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "?\n\n",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "Because",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " he",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " was",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " outstanding",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " in",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " his",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": " field",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "!",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-ab1a32474062",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"function_call": null,
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
{
|
||||||
|
"test_id": null,
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/models",
|
||||||
|
"headers": {},
|
||||||
|
"body": {},
|
||||||
|
"endpoint": "/v1/models",
|
||||||
|
"model": ""
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama3.2:3b-instruct-fp16",
|
||||||
|
"created": 1760453641,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "qwen3:4b",
|
||||||
|
"created": 1757615302,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-oss:latest",
|
||||||
|
"created": 1756395223,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "nomic-embed-text:latest",
|
||||||
|
"created": 1756318548,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama3.2:3b",
|
||||||
|
"created": 1755191039,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "all-minilm:l6-v2",
|
||||||
|
"created": 1753968177,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama3.2:1b",
|
||||||
|
"created": 1746124735,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama3.2:latest",
|
||||||
|
"created": 1746044170,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -42,7 +42,9 @@ def pytest_sessionstart(session):
|
||||||
|
|
||||||
# Set test stack config type for api_recorder test isolation
|
# Set test stack config type for api_recorder test isolation
|
||||||
stack_config = session.config.getoption("--stack-config", default=None)
|
stack_config = session.config.getoption("--stack-config", default=None)
|
||||||
if stack_config and (stack_config.startswith("server:") or stack_config.startswith("http")):
|
if stack_config and (
|
||||||
|
stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http")
|
||||||
|
):
|
||||||
os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
|
os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
|
||||||
logger.info(f"Test stack config type: server (stack_config={stack_config})")
|
logger.info(f"Test stack config type: server (stack_config={stack_config})")
|
||||||
else:
|
else:
|
||||||
|
|
@ -139,7 +141,9 @@ def pytest_addoption(parser):
|
||||||
a 'pointer' to the stack. this can be either be:
|
a 'pointer' to the stack. this can be either be:
|
||||||
(a) a template name like `starter`, or
|
(a) a template name like `starter`, or
|
||||||
(b) a path to a run.yaml file, or
|
(b) a path to a run.yaml file, or
|
||||||
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
|
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
|
||||||
|
(d) a server config like `server:ci-tests`, or
|
||||||
|
(e) a docker config like `docker:ci-tests` (builds and runs container)
|
||||||
"""
|
"""
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
95
tests/integration/telemetry/conftest.py
Normal file
95
tests/integration/telemetry/conftest.py
Normal file
|
|
@ -0,0 +1,95 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""Telemetry test configuration using OpenTelemetry SDK exporters.
|
||||||
|
|
||||||
|
This conftest provides in-memory telemetry collection for library_client mode only.
|
||||||
|
Tests using these fixtures should skip in server mode since the in-memory collector
|
||||||
|
cannot access spans from a separate server process.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import opentelemetry.metrics as otel_metrics
|
||||||
|
import opentelemetry.trace as otel_trace
|
||||||
|
import pytest
|
||||||
|
from opentelemetry import metrics, trace
|
||||||
|
from opentelemetry.sdk.metrics import MeterProvider
|
||||||
|
from opentelemetry.sdk.metrics.export import InMemoryMetricReader
|
||||||
|
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
|
||||||
|
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||||
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
|
||||||
|
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
|
||||||
|
from llama_stack.testing.api_recorder import patch_httpx_for_test_id
|
||||||
|
from tests.integration.fixtures.common import instantiate_llama_stack_client
|
||||||
|
|
||||||
|
|
||||||
|
class TestCollector:
|
||||||
|
def __init__(self, span_exp, metric_read):
|
||||||
|
assert span_exp and metric_read
|
||||||
|
self.span_exporter = span_exp
|
||||||
|
self.metric_reader = metric_read
|
||||||
|
|
||||||
|
def get_spans(self) -> tuple[ReadableSpan, ...]:
|
||||||
|
return self.span_exporter.get_finished_spans()
|
||||||
|
|
||||||
|
def get_metrics(self) -> Any | None:
|
||||||
|
metrics = self.metric_reader.get_metrics_data()
|
||||||
|
if metrics and metrics.resource_metrics:
|
||||||
|
return metrics.resource_metrics[0].scope_metrics[0].metrics
|
||||||
|
return None
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
self.span_exporter.clear()
|
||||||
|
self.metric_reader.get_metrics_data()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def _telemetry_providers():
|
||||||
|
"""Set up in-memory OTEL providers before llama_stack_client initializes."""
|
||||||
|
# Reset set-once flags to allow re-initialization
|
||||||
|
if hasattr(otel_trace, "_TRACER_PROVIDER_SET_ONCE"):
|
||||||
|
otel_trace._TRACER_PROVIDER_SET_ONCE._done = False # type: ignore
|
||||||
|
if hasattr(otel_metrics, "_METER_PROVIDER_SET_ONCE"):
|
||||||
|
otel_metrics._METER_PROVIDER_SET_ONCE._done = False # type: ignore
|
||||||
|
|
||||||
|
# Create in-memory exporters/readers
|
||||||
|
span_exporter = InMemorySpanExporter()
|
||||||
|
tracer_provider = TracerProvider()
|
||||||
|
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
|
||||||
|
trace.set_tracer_provider(tracer_provider)
|
||||||
|
|
||||||
|
metric_reader = InMemoryMetricReader()
|
||||||
|
meter_provider = MeterProvider(metric_readers=[metric_reader])
|
||||||
|
metrics.set_meter_provider(meter_provider)
|
||||||
|
|
||||||
|
# Set module-level provider so TelemetryAdapter uses our in-memory providers
|
||||||
|
telemetry_module._TRACER_PROVIDER = tracer_provider
|
||||||
|
|
||||||
|
yield (span_exporter, metric_reader, tracer_provider, meter_provider)
|
||||||
|
|
||||||
|
telemetry_module._TRACER_PROVIDER = None
|
||||||
|
tracer_provider.shutdown()
|
||||||
|
meter_provider.shutdown()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def llama_stack_client(_telemetry_providers, request):
|
||||||
|
"""Override llama_stack_client to ensure in-memory telemetry providers are used."""
|
||||||
|
patch_httpx_for_test_id()
|
||||||
|
client = instantiate_llama_stack_client(request.session)
|
||||||
|
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_otlp_collector(_telemetry_providers):
|
||||||
|
"""Provides access to telemetry data and clears between tests."""
|
||||||
|
span_exporter, metric_reader, _, _ = _telemetry_providers
|
||||||
|
collector = TestCollector(span_exporter, metric_reader)
|
||||||
|
yield collector
|
||||||
|
collector.clear()
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/telemetry/test_openai_telemetry.py::test_openai_completion_creates_telemetry[txt=ollama/llama3.2:3b-instruct-fp16]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Test OpenAI telemetry creation"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": false
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama3.2:3b-instruct-fp16"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-0de60cd6a6ec",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "I'm happy to help you with setting up and testing OpenAI's telemetry creation.\n\nOpenAI provides a feature called \"Telemetry\" which allows developers to collect data about their users' interactions with the model. To test this feature, we need to create a simple application that uses the OpenAI API and sends telemetry data to their servers.\n\nHere's an example code in Python that demonstrates how to create a simple telemetry creator:\n\n```python\nimport os\nfrom openai.api import API\n\n# Initialize the OpenAI API client\napi = API(os.environ['OPENAI_API_KEY'])\n\ndef create_user():\n # Create a new user entity\n user_entity = {\n 'id': 'user-123',\n 'name': 'John Doe',\n 'email': 'john.doe@example.com'\n }\n \n # Send the user creation request to OpenAI\n response = api.users.create(user_entity)\n print(f\"User created: {response}\")\n\ndef create_transaction():\n # Create a new transaction entity\n transaction_entity = {\n 'id': 'tran-123',\n 'user_id': 'user-123',\n 'transaction_type': 'query'\n }\n \n # Send the transaction creation request to OpenAI\n response = api.transactions.create(transaction_entity)\n print(f\"Transaction created: {response}\")\n\ndef send_telemetry_data():\n # Create a new telemetry event entity\n telemetry_event_entity = {\n 'id': 'telem-123',\n 'transaction_id': 'tran-123',\n 'data': '{ \"event\": \"test\", \"user_id\": 1 }'\n }\n \n # Send the telemetry data to OpenAI\n response = api.telemetry.create(telemetry_event_entity)\n print(f\"Telemetry event sent: {response}\")\n\n# Test the telemetry creation\ncreate_user()\ncreate_transaction()\nsend_telemetry_data()\n```\n\nMake sure you replace `OPENAI_API_KEY` with your actual API key. Also, ensure that you have the OpenAI API client library installed by running `pip install openai`.\n\nOnce you've created the test code, run it and observe the behavior of the telemetry creation process.\n\nPlease let me know if you need further modifications or assistance!",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 460,
|
||||||
|
"prompt_tokens": 30,
|
||||||
|
"total_tokens": 490,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=ollama/llama3.2:3b-instruct-fp16]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Test trace openai with temperature 0.7"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 100,
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.7
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama3.2:3b-instruct-fp16"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-1fcfd86d8111",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n input",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 100,
|
||||||
|
"prompt_tokens": 35,
|
||||||
|
"total_tokens": 135,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
||||||
4211
tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
generated
Normal file
4211
tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
generated
Normal file
File diff suppressed because it is too large
Load diff
4263
tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
generated
Normal file
4263
tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,59 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=llama3.2:3b-instruct-fp16]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:11434/v1/v1/chat/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Test trace openai with temperature 0.7"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 100,
|
||||||
|
"stream": false,
|
||||||
|
"temperature": 0.7
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/chat/completions",
|
||||||
|
"model": "llama3.2:3b-instruct-fp16"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-dba5042d6691",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"message": {
|
||||||
|
"content": "To test the \"trace\" functionality of OpenAI's GPT-4 model at a temperature of 0.7, you can follow these steps:\n\n1. First, make sure you have an account with OpenAI and have been granted access to their API.\n\n2. You will need to install the `transformers` library, which is the official library for working with Transformers models like GPT-4:\n\n ```bash\npip install transformers\n```\n\n3. Next, import the necessary",
|
||||||
|
"refusal": null,
|
||||||
|
"role": "assistant",
|
||||||
|
"annotations": null,
|
||||||
|
"audio": null,
|
||||||
|
"function_call": null,
|
||||||
|
"tool_calls": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"service_tier": null,
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 100,
|
||||||
|
"prompt_tokens": 35,
|
||||||
|
"total_tokens": 135,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
}
|
||||||
|
}
|
||||||
112
tests/integration/telemetry/test_completions.py
Normal file
112
tests/integration/telemetry/test_completions.py
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""Telemetry tests verifying @trace_protocol decorator format using in-memory exporter."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.skipif(
|
||||||
|
os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE") == "server",
|
||||||
|
reason="In-memory telemetry tests only work in library_client mode (server mode runs in separate process)",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||||
|
"""Verify streaming adds chunk_count and __type__=async_generator."""
|
||||||
|
|
||||||
|
stream = llama_stack_client.chat.completions.create(
|
||||||
|
model=text_model_id,
|
||||||
|
messages=[{"role": "user", "content": "Test trace openai 1"}],
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = list(stream)
|
||||||
|
assert len(chunks) > 0
|
||||||
|
|
||||||
|
spans = mock_otlp_collector.get_spans()
|
||||||
|
assert len(spans) > 0
|
||||||
|
|
||||||
|
chunk_count = None
|
||||||
|
for span in spans:
|
||||||
|
if span.attributes.get("__type__") == "async_generator":
|
||||||
|
chunk_count = span.attributes.get("chunk_count")
|
||||||
|
if chunk_count:
|
||||||
|
chunk_count = int(chunk_count)
|
||||||
|
break
|
||||||
|
|
||||||
|
assert chunk_count is not None
|
||||||
|
assert chunk_count == len(chunks)
|
||||||
|
|
||||||
|
|
||||||
|
def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||||
|
"""Comprehensive validation of telemetry data format including spans and metrics."""
|
||||||
|
response = llama_stack_client.chat.completions.create(
|
||||||
|
model=text_model_id,
|
||||||
|
messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],
|
||||||
|
temperature=0.7,
|
||||||
|
max_tokens=100,
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle both dict and Pydantic model for usage
|
||||||
|
# This occurs due to the replay system returning a dict for usage, but the client returning a Pydantic model
|
||||||
|
# TODO: Fix this by making the replay system return a Pydantic model for usage
|
||||||
|
usage = response.usage if isinstance(response.usage, dict) else response.usage.model_dump()
|
||||||
|
assert usage.get("prompt_tokens") and usage["prompt_tokens"] > 0
|
||||||
|
assert usage.get("completion_tokens") and usage["completion_tokens"] > 0
|
||||||
|
assert usage.get("total_tokens") and usage["total_tokens"] > 0
|
||||||
|
|
||||||
|
# Verify spans
|
||||||
|
spans = mock_otlp_collector.get_spans()
|
||||||
|
assert len(spans) == 5
|
||||||
|
|
||||||
|
# we only need this captured one time
|
||||||
|
logged_model_id = None
|
||||||
|
|
||||||
|
for span in spans:
|
||||||
|
attrs = span.attributes
|
||||||
|
assert attrs is not None
|
||||||
|
|
||||||
|
# Root span is created manually by tracing middleware, not by @trace_protocol decorator
|
||||||
|
is_root_span = attrs.get("__root__") is True
|
||||||
|
|
||||||
|
if is_root_span:
|
||||||
|
# Root spans have different attributes
|
||||||
|
assert attrs.get("__location__") in ["library_client", "server"]
|
||||||
|
else:
|
||||||
|
# Non-root spans are created by @trace_protocol decorator
|
||||||
|
assert attrs.get("__autotraced__")
|
||||||
|
assert attrs.get("__class__") and attrs.get("__method__")
|
||||||
|
assert attrs.get("__type__") in ["async", "sync", "async_generator"]
|
||||||
|
|
||||||
|
args = json.loads(attrs["__args__"])
|
||||||
|
if "model_id" in args:
|
||||||
|
logged_model_id = args["model_id"]
|
||||||
|
|
||||||
|
assert logged_model_id is not None
|
||||||
|
assert logged_model_id == text_model_id
|
||||||
|
|
||||||
|
# TODO: re-enable this once metrics get fixed
|
||||||
|
"""
|
||||||
|
# Verify token usage metrics in response
|
||||||
|
metrics = mock_otlp_collector.get_metrics()
|
||||||
|
|
||||||
|
assert metrics
|
||||||
|
for metric in metrics:
|
||||||
|
assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
|
||||||
|
assert metric.unit == "tokens"
|
||||||
|
assert metric.data.data_points and len(metric.data.data_points) == 1
|
||||||
|
match metric.name:
|
||||||
|
case "completion_tokens":
|
||||||
|
assert metric.data.data_points[0].value == usage["completion_tokens"]
|
||||||
|
case "total_tokens":
|
||||||
|
assert metric.data.data_points[0].value == usage["total_tokens"]
|
||||||
|
case "prompt_tokens":
|
||||||
|
assert metric.data.data_points[0].value == usage["prompt_tokens"
|
||||||
|
"""
|
||||||
50
tests/unit/distribution/test_stack_list_deps.py
Normal file
50
tests/unit/distribution/test_stack_list_deps.py
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from io import StringIO
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from llama_stack.cli.stack._list_deps import (
|
||||||
|
run_stack_list_deps_command,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_stack_list_deps_basic():
|
||||||
|
args = argparse.Namespace(
|
||||||
|
config=None,
|
||||||
|
env_name="test-env",
|
||||||
|
providers="inference=remote::ollama",
|
||||||
|
format="deps-only",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
|
||||||
|
run_stack_list_deps_command(args)
|
||||||
|
output = mock_stdout.getvalue()
|
||||||
|
|
||||||
|
# deps-only format should NOT include "uv pip install" or "Dependencies for"
|
||||||
|
assert "uv pip install" not in output
|
||||||
|
assert "Dependencies for" not in output
|
||||||
|
|
||||||
|
# Check that expected dependencies are present
|
||||||
|
assert "ollama" in output
|
||||||
|
assert "aiohttp" in output
|
||||||
|
assert "fastapi" in output
|
||||||
|
|
||||||
|
|
||||||
|
def test_stack_list_deps_with_distro_uv():
|
||||||
|
args = argparse.Namespace(
|
||||||
|
config="starter",
|
||||||
|
env_name=None,
|
||||||
|
providers=None,
|
||||||
|
format="uv",
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
|
||||||
|
run_stack_list_deps_command(args)
|
||||||
|
output = mock_stdout.getvalue()
|
||||||
|
|
||||||
|
assert "uv pip install" in output
|
||||||
38
uv.lock
generated
38
uv.lock
generated
|
|
@ -4129,27 +4129,27 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ruff"
|
name = "ruff"
|
||||||
version = "0.12.5"
|
version = "0.9.10"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/30/cd/01015eb5034605fd98d829c5839ec2c6b4582b479707f7c1c2af861e8258/ruff-0.12.5.tar.gz", hash = "sha256:b209db6102b66f13625940b7f8c7d0f18e20039bb7f6101fbdac935c9612057e", size = 5170722, upload-time = "2025-07-24T13:26:37.456Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/20/8e/fafaa6f15c332e73425d9c44ada85360501045d5ab0b81400076aff27cf6/ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7", size = 3759776, upload-time = "2025-03-07T15:27:44.363Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/d4/de/ad2f68f0798ff15dd8c0bcc2889558970d9a685b3249565a937cd820ad34/ruff-0.12.5-py3-none-linux_armv6l.whl", hash = "sha256:1de2c887e9dec6cb31fcb9948299de5b2db38144e66403b9660c9548a67abd92", size = 11819133, upload-time = "2025-07-24T13:25:56.369Z" },
|
{ url = "https://files.pythonhosted.org/packages/73/b2/af7c2cc9e438cbc19fafeec4f20bfcd72165460fe75b2b6e9a0958c8c62b/ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d", size = 10049494, upload-time = "2025-03-07T15:26:51.268Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f8/fc/c6b65cd0e7fbe60f17e7ad619dca796aa49fbca34bb9bea5f8faf1ec2643/ruff-0.12.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d1ab65e7d8152f519e7dea4de892317c9da7a108da1c56b6a3c1d5e7cf4c5e9a", size = 12501114, upload-time = "2025-07-24T13:25:59.471Z" },
|
{ url = "https://files.pythonhosted.org/packages/6d/12/03f6dfa1b95ddd47e6969f0225d60d9d7437c91938a310835feb27927ca0/ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d", size = 10853584, upload-time = "2025-03-07T15:26:56.104Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c5/de/c6bec1dce5ead9f9e6a946ea15e8d698c35f19edc508289d70a577921b30/ruff-0.12.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:962775ed5b27c7aa3fdc0d8f4d4433deae7659ef99ea20f783d666e77338b8cf", size = 11716873, upload-time = "2025-07-24T13:26:01.496Z" },
|
{ url = "https://files.pythonhosted.org/packages/02/49/1c79e0906b6ff551fb0894168763f705bf980864739572b2815ecd3c9df0/ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d", size = 10155692, upload-time = "2025-03-07T15:27:01.385Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a1/16/cf372d2ebe91e4eb5b82a2275c3acfa879e0566a7ac94d331ea37b765ac8/ruff-0.12.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b4cae449597e7195a49eb1cdca89fd9fbb16140c7579899e87f4c85bf82f73", size = 11958829, upload-time = "2025-07-24T13:26:03.721Z" },
|
{ url = "https://files.pythonhosted.org/packages/5b/01/85e8082e41585e0e1ceb11e41c054e9e36fed45f4b210991052d8a75089f/ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c", size = 10369760, upload-time = "2025-03-07T15:27:04.023Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/25/bf/cd07e8f6a3a6ec746c62556b4c4b79eeb9b0328b362bb8431b7b8afd3856/ruff-0.12.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b13489c3dc50de5e2d40110c0cce371e00186b880842e245186ca862bf9a1ac", size = 11626619, upload-time = "2025-07-24T13:26:06.118Z" },
|
{ url = "https://files.pythonhosted.org/packages/a1/90/0bc60bd4e5db051f12445046d0c85cc2c617095c0904f1aa81067dc64aea/ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e", size = 9912196, upload-time = "2025-03-07T15:27:06.93Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d8/c9/c2ccb3b8cbb5661ffda6925f81a13edbb786e623876141b04919d1128370/ruff-0.12.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1504fea81461cf4841778b3ef0a078757602a3b3ea4b008feb1308cb3f23e08", size = 13221894, upload-time = "2025-07-24T13:26:08.292Z" },
|
{ url = "https://files.pythonhosted.org/packages/66/ea/0b7e8c42b1ec608033c4d5a02939c82097ddcb0b3e393e4238584b7054ab/ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12", size = 11434985, upload-time = "2025-03-07T15:27:10.082Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/6b/58/68a5be2c8e5590ecdad922b2bcd5583af19ba648f7648f95c51c3c1eca81/ruff-0.12.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c7da4129016ae26c32dfcbd5b671fe652b5ab7fc40095d80dcff78175e7eddd4", size = 14163909, upload-time = "2025-07-24T13:26:10.474Z" },
|
{ url = "https://files.pythonhosted.org/packages/d5/86/3171d1eff893db4f91755175a6e1163c5887be1f1e2f4f6c0c59527c2bfd/ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16", size = 12155842, upload-time = "2025-03-07T15:27:12.727Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/bd/d1/ef6b19622009ba8386fdb792c0743f709cf917b0b2f1400589cbe4739a33/ruff-0.12.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca972c80f7ebcfd8af75a0f18b17c42d9f1ef203d163669150453f50ca98ab7b", size = 13583652, upload-time = "2025-07-24T13:26:13.381Z" },
|
{ url = "https://files.pythonhosted.org/packages/89/9e/700ca289f172a38eb0bca752056d0a42637fa17b81649b9331786cb791d7/ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52", size = 11613804, upload-time = "2025-03-07T15:27:15.944Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/62/e3/1c98c566fe6809a0c83751d825a03727f242cdbe0d142c9e292725585521/ruff-0.12.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbf9f25dfb501f4237ae7501d6364b76a01341c6f1b2cd6764fe449124bb2a", size = 12700451, upload-time = "2025-07-24T13:26:15.488Z" },
|
{ url = "https://files.pythonhosted.org/packages/f2/92/648020b3b5db180f41a931a68b1c8575cca3e63cec86fd26807422a0dbad/ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1", size = 13823776, upload-time = "2025-03-07T15:27:18.996Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/24/ff/96058f6506aac0fbc0d0fc0d60b0d0bd746240a0594657a2d94ad28033ba/ruff-0.12.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c47dea6ae39421851685141ba9734767f960113d51e83fd7bb9958d5be8763a", size = 12937465, upload-time = "2025-07-24T13:26:17.808Z" },
|
{ url = "https://files.pythonhosted.org/packages/5e/a6/cc472161cd04d30a09d5c90698696b70c169eeba2c41030344194242db45/ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c", size = 11302673, upload-time = "2025-03-07T15:27:21.655Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/eb/d3/68bc5e7ab96c94b3589d1789f2dd6dd4b27b263310019529ac9be1e8f31b/ruff-0.12.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5076aa0e61e30f848846f0265c873c249d4b558105b221be1828f9f79903dc5", size = 11771136, upload-time = "2025-07-24T13:26:20.422Z" },
|
{ url = "https://files.pythonhosted.org/packages/6c/db/d31c361c4025b1b9102b4d032c70a69adb9ee6fde093f6c3bf29f831c85c/ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43", size = 10235358, upload-time = "2025-03-07T15:27:24.72Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/52/75/7356af30a14584981cabfefcf6106dea98cec9a7af4acb5daaf4b114845f/ruff-0.12.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a5a4c7830dadd3d8c39b1cc85386e2c1e62344f20766be6f173c22fb5f72f293", size = 11601644, upload-time = "2025-07-24T13:26:22.928Z" },
|
{ url = "https://files.pythonhosted.org/packages/d1/86/d6374e24a14d4d93ebe120f45edd82ad7dcf3ef999ffc92b197d81cdc2a5/ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c", size = 9886177, upload-time = "2025-03-07T15:27:27.282Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c2/67/91c71d27205871737cae11025ee2b098f512104e26ffd8656fd93d0ada0a/ruff-0.12.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:46699f73c2b5b137b9dc0fc1a190b43e35b008b398c6066ea1350cce6326adcb", size = 12478068, upload-time = "2025-07-24T13:26:26.134Z" },
|
{ url = "https://files.pythonhosted.org/packages/00/62/a61691f6eaaac1e945a1f3f59f1eea9a218513139d5b6c2b8f88b43b5b8f/ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5", size = 10864747, upload-time = "2025-03-07T15:27:30.637Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/34/04/b6b00383cf2f48e8e78e14eb258942fdf2a9bf0287fbf5cdd398b749193a/ruff-0.12.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a655a0a0d396f0f072faafc18ebd59adde8ca85fb848dc1b0d9f024b9c4d3bb", size = 12991537, upload-time = "2025-07-24T13:26:28.533Z" },
|
{ url = "https://files.pythonhosted.org/packages/ee/94/2c7065e1d92a8a8a46d46d9c3cf07b0aa7e0a1e0153d74baa5e6620b4102/ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8", size = 11360441, upload-time = "2025-03-07T15:27:33.356Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/3e/b9/053d6445dc7544fb6594785056d8ece61daae7214859ada4a152ad56b6e0/ruff-0.12.5-py3-none-win32.whl", hash = "sha256:dfeb2627c459b0b78ca2bbdc38dd11cc9a0a88bf91db982058b26ce41714ffa9", size = 11751575, upload-time = "2025-07-24T13:26:30.835Z" },
|
{ url = "https://files.pythonhosted.org/packages/a7/8f/1f545ea6f9fcd7bf4368551fb91d2064d8f0577b3079bb3f0ae5779fb773/ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029", size = 10247401, upload-time = "2025-03-07T15:27:35.994Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/bc/0f/ab16e8259493137598b9149734fec2e06fdeda9837e6f634f5c4e35916da/ruff-0.12.5-py3-none-win_amd64.whl", hash = "sha256:ae0d90cf5f49466c954991b9d8b953bd093c32c27608e409ae3564c63c5306a5", size = 12882273, upload-time = "2025-07-24T13:26:32.929Z" },
|
{ url = "https://files.pythonhosted.org/packages/4f/18/fb703603ab108e5c165f52f5b86ee2aa9be43bb781703ec87c66a5f5d604/ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1", size = 11366360, upload-time = "2025-03-07T15:27:38.66Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564, upload-time = "2025-07-24T13:26:34.994Z" },
|
{ url = "https://files.pythonhosted.org/packages/35/85/338e603dc68e7d9994d5d84f24adbf69bae760ba5efd3e20f5ff2cec18da/ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69", size = 10436892, upload-time = "2025-03-07T15:27:41.687Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue