mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 04:00:42 +00:00
Merge remote-tracking branch 'origin/main' into stores
This commit is contained in:
commit
490b212576
89 changed files with 19353 additions and 8323 deletions
19
.dockerignore
Normal file
19
.dockerignore
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
.venv
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
*.so
|
||||
.git
|
||||
.gitignore
|
||||
htmlcov*
|
||||
.coverage
|
||||
coverage*
|
||||
.cache
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
.ruff_cache
|
||||
uv.lock
|
||||
node_modules
|
||||
build
|
||||
/tmp
|
||||
|
|
@ -57,7 +57,7 @@ runs:
|
|||
echo "Building Llama Stack"
|
||||
|
||||
LLAMA_STACK_DIR=. \
|
||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
||||
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||
|
||||
- name: Configure git for commits
|
||||
shell: bash
|
||||
|
|
|
|||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -14,6 +14,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
|||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||
| Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
|
||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
||||
|
|
|
|||
7
.github/workflows/install-script-ci.yml
vendored
7
.github/workflows/install-script-ci.yml
vendored
|
|
@ -30,8 +30,11 @@ jobs:
|
|||
|
||||
- name: Build a single provider
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync \
|
||||
llama stack build --template starter --image-type container --image-name test
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=starter \
|
||||
--tag llama-stack:starter-ci
|
||||
|
||||
- name: Run installer end-to-end
|
||||
run: |
|
||||
|
|
|
|||
4
.github/workflows/integration-tests.yml
vendored
4
.github/workflows/integration-tests.yml
vendored
|
|
@ -47,7 +47,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
client-type: [library, server]
|
||||
client-type: [library, server, docker]
|
||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||
|
|
@ -82,7 +82,7 @@ jobs:
|
|||
env:
|
||||
OPENAI_API_KEY: dummy
|
||||
with:
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
inference-mode: 'replay'
|
||||
suite: ${{ matrix.config.suite }}
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ jobs:
|
|||
|
||||
- name: Build Llama Stack
|
||||
run: |
|
||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
||||
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||
|
||||
- name: Check Storage and Memory Available Before Tests
|
||||
if: ${{ always() }}
|
||||
|
|
|
|||
57
.github/workflows/providers-build.yml
vendored
57
.github/workflows/providers-build.yml
vendored
|
|
@ -14,6 +14,8 @@ on:
|
|||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/distributions/**'
|
||||
- 'pyproject.toml'
|
||||
- 'containers/Containerfile'
|
||||
- '.dockerignore'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
|
|
@ -24,6 +26,8 @@ on:
|
|||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/distributions/**'
|
||||
- 'pyproject.toml'
|
||||
- 'containers/Containerfile'
|
||||
- '.dockerignore'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||
|
|
@ -60,15 +64,19 @@ jobs:
|
|||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Print build dependencies
|
||||
- name: Install distribution into venv
|
||||
if: matrix.image-type == 'venv'
|
||||
run: |
|
||||
uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
|
||||
uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||
|
||||
- name: Run Llama Stack Build
|
||||
- name: Build container image
|
||||
if: matrix.image-type == 'container'
|
||||
run: |
|
||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=${{ matrix.distro }} \
|
||||
--tag llama-stack:${{ matrix.distro }}-ci
|
||||
|
||||
- name: Print dependencies in the image
|
||||
if: matrix.image-type == 'venv'
|
||||
|
|
@ -86,8 +94,8 @@ jobs:
|
|||
|
||||
- name: Build a single provider
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
|
||||
|
||||
uv pip install -e .
|
||||
uv run --no-sync llama stack list-deps --providers inference=remote::ollama | xargs -L1 uv pip install
|
||||
build-custom-container-distribution:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
|
@ -97,11 +105,16 @@ jobs:
|
|||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Build a single provider
|
||||
- name: Build container image
|
||||
run: |
|
||||
yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
|
||||
yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||
-t llama-stack:ci-tests
|
||||
|
||||
- name: Inspect the container image entrypoint
|
||||
run: |
|
||||
|
|
@ -112,7 +125,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -129,17 +142,19 @@ jobs:
|
|||
- name: Pin distribution to UBI9 base
|
||||
run: |
|
||||
yq -i '
|
||||
.image_type = "container" |
|
||||
.image_name = "ubi9-test" |
|
||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||
' llama_stack/distributions/ci-tests/build.yaml
|
||||
|
||||
- name: Build dev container (UBI9)
|
||||
env:
|
||||
USE_COPY_NOT_MOUNT: "true"
|
||||
LLAMA_STACK_DIR: "."
|
||||
- name: Build UBI9 container image
|
||||
run: |
|
||||
uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||
-t llama-stack:ci-tests-ubi9
|
||||
|
||||
- name: Inspect UBI9 image
|
||||
run: |
|
||||
|
|
@ -150,7 +165,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
name: Test llama stack list-deps
|
||||
|
||||
run-name: Test llama stack list-deps
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/list_deps.py'
|
||||
- 'llama_stack/cli/stack/_list_deps.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-list-deps.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/list_deps.py'
|
||||
- 'llama_stack/cli/stack/_list_deps.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-list-deps.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Generate Distribution List
|
||||
id: set-matrix
|
||||
run: |
|
||||
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||
|
||||
list-deps:
|
||||
needs: generate-matrix
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
|
||||
image-type: [venv, container]
|
||||
fail-fast: false # We want to run all jobs even if some fail
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Print dependencies
|
||||
run: |
|
||||
uv run llama stack list-deps ${{ matrix.distro }}
|
||||
|
||||
- name: Install Distro using llama stack list-deps
|
||||
run: |
|
||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||
|
||||
- name: Print dependencies in the image
|
||||
if: matrix.image-type == 'venv'
|
||||
run: |
|
||||
uv pip list
|
||||
|
||||
show-single-provider:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Show a single provider
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps --providers inference=remote::ollama
|
||||
|
||||
list-deps-from-config:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: list-des from Config
|
||||
env:
|
||||
USE_COPY_NOT_MOUNT: "true"
|
||||
LLAMA_STACK_DIR: "."
|
||||
run: |
|
||||
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
|
||||
|
|
@ -46,9 +46,9 @@ jobs:
|
|||
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
||||
cat tests/external/ramalama-stack/run.yaml
|
||||
|
||||
- name: Build distro from config file
|
||||
- name: Install distribution dependencies
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
|
||||
uv run llama stack list-deps tests/external/ramalama-stack/build.yaml | xargs -L1 uv pip install
|
||||
|
||||
- name: Start Llama Stack server in background
|
||||
if: ${{ matrix.image-type }} == 'venv'
|
||||
|
|
|
|||
7
.github/workflows/test-external.yml
vendored
7
.github/workflows/test-external.yml
vendored
|
|
@ -44,11 +44,14 @@ jobs:
|
|||
|
||||
- name: Print distro dependencies
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml --print-deps-only
|
||||
uv run --no-sync llama stack list-deps tests/external/build.yaml
|
||||
|
||||
- name: Build distro from config file
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml
|
||||
uv venv ci-test
|
||||
source ci-test/bin/activate
|
||||
uv pip install -e .
|
||||
LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/build.yaml | xargs -L1 uv pip install
|
||||
|
||||
- name: Start Llama Stack server in background
|
||||
if: ${{ matrix.image-type }} == 'venv'
|
||||
|
|
|
|||
|
|
@ -167,9 +167,9 @@ under the LICENSE file in the root directory of this source tree.
|
|||
|
||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||
|
||||
### Using `llama stack build`
|
||||
### Installing dependencies of distributions
|
||||
|
||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
||||
When installing dependencies for a distribution, you can use `llama stack list-deps` to view and install the required packages.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
|
|
@ -177,7 +177,12 @@ cd work/
|
|||
git clone https://github.com/llamastack/llama-stack.git
|
||||
git clone https://github.com/llamastack/llama-stack-client-python.git
|
||||
cd llama-stack
|
||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
||||
|
||||
# Show dependencies for a distribution
|
||||
llama stack list-deps <distro-name>
|
||||
|
||||
# Install dependencies
|
||||
llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
### Updating distribution configurations
|
||||
|
|
|
|||
|
|
@ -27,8 +27,11 @@ MODEL="Llama-4-Scout-17B-16E-Instruct"
|
|||
# get meta url from llama.com
|
||||
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
||||
|
||||
# install dependencies for the distribution
|
||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||
|
||||
# start a llama stack server
|
||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
|
||||
|
||||
# install client to interact with the server
|
||||
pip install llama-stack-client
|
||||
|
|
@ -89,7 +92,7 @@ As more providers start supporting Llama 4, you can use them in Llama Stack as w
|
|||
To try Llama Stack locally, run:
|
||||
|
||||
```bash
|
||||
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
|
||||
curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
### Overview
|
||||
|
|
|
|||
136
containers/Containerfile
Normal file
136
containers/Containerfile
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# syntax=docker/dockerfile:1.6
|
||||
#
|
||||
# This Dockerfile is used to build the Llama Stack container image.
|
||||
# Example:
|
||||
# docker build \
|
||||
# -f containers/Containerfile \
|
||||
# --build-arg DISTRO_NAME=starter \
|
||||
# --tag llama-stack:starter .
|
||||
|
||||
ARG BASE_IMAGE=python:3.12-slim
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
ARG INSTALL_MODE="pypi"
|
||||
ARG LLAMA_STACK_DIR="/workspace"
|
||||
ARG LLAMA_STACK_CLIENT_DIR=""
|
||||
ARG PYPI_VERSION=""
|
||||
ARG TEST_PYPI_VERSION=""
|
||||
ARG KEEP_WORKSPACE=""
|
||||
ARG DISTRO_NAME="starter"
|
||||
ARG RUN_CONFIG_PATH=""
|
||||
ARG UV_HTTP_TIMEOUT=500
|
||||
ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
WORKDIR /app
|
||||
|
||||
RUN set -eux; \
|
||||
if command -v dnf >/dev/null 2>&1; then \
|
||||
dnf -y update && \
|
||||
dnf install -y iputils git net-tools wget \
|
||||
vim-minimal python3.12 python3.12-pip python3.12-wheel \
|
||||
python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
|
||||
ln -sf /usr/bin/pip3.12 /usr/local/bin/pip && \
|
||||
ln -sf /usr/bin/python3.12 /usr/local/bin/python && \
|
||||
dnf clean all; \
|
||||
elif command -v apt-get >/dev/null 2>&1; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
iputils-ping net-tools iproute2 dnsutils telnet \
|
||||
curl wget git procps psmisc lsof traceroute bubblewrap \
|
||||
gcc g++ && \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
else \
|
||||
echo "Unsupported base image: expected dnf or apt-get" >&2; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
RUN pip install --no-cache-dir uv
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
|
||||
ENV INSTALL_MODE=${INSTALL_MODE}
|
||||
ENV LLAMA_STACK_DIR=${LLAMA_STACK_DIR}
|
||||
ENV LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR}
|
||||
ENV PYPI_VERSION=${PYPI_VERSION}
|
||||
ENV TEST_PYPI_VERSION=${TEST_PYPI_VERSION}
|
||||
ENV KEEP_WORKSPACE=${KEEP_WORKSPACE}
|
||||
ENV DISTRO_NAME=${DISTRO_NAME}
|
||||
ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH}
|
||||
|
||||
# Copy the repository so editable installs and run configurations are available.
|
||||
COPY . /workspace
|
||||
|
||||
# Install llama-stack
|
||||
RUN set -eux; \
|
||||
if [ "$INSTALL_MODE" = "editable" ]; then \
|
||||
if [ ! -d "$LLAMA_STACK_DIR" ]; then \
|
||||
echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
|
||||
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
|
||||
uv pip install --no-cache-dir fastapi libcst; \
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then \
|
||||
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
|
||||
else \
|
||||
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
|
||||
fi; \
|
||||
else \
|
||||
if [ -n "$PYPI_VERSION" ]; then \
|
||||
uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
|
||||
else \
|
||||
uv pip install --no-cache-dir llama-stack; \
|
||||
fi; \
|
||||
fi;
|
||||
|
||||
# Install the client package if it is provided
|
||||
RUN set -eux; \
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
|
||||
fi;
|
||||
|
||||
# Install the dependencies for the distribution
|
||||
RUN set -eux; \
|
||||
if [ -z "$DISTRO_NAME" ]; then \
|
||||
echo "DISTRO_NAME must be provided" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
deps="$(llama stack list-deps "$DISTRO_NAME")"; \
|
||||
if [ -n "$deps" ]; then \
|
||||
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
RUN set -eux; \
|
||||
pip uninstall -y uv; \
|
||||
should_remove=1; \
|
||||
if [ -n "$KEEP_WORKSPACE" ]; then should_remove=0; fi; \
|
||||
if [ "$INSTALL_MODE" = "editable" ]; then should_remove=0; fi; \
|
||||
case "$RUN_CONFIG_PATH" in \
|
||||
/workspace*) should_remove=0 ;; \
|
||||
esac; \
|
||||
if [ "$should_remove" -eq 1 ] && [ -d /workspace ]; then rm -rf /workspace; fi
|
||||
|
||||
RUN cat <<'EOF' >/usr/local/bin/llama-stack-entrypoint.sh
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ -n "$RUN_CONFIG_PATH" ] && [ -f "$RUN_CONFIG_PATH" ]; then
|
||||
exec llama stack run "$RUN_CONFIG_PATH" "$@"
|
||||
fi
|
||||
|
||||
if [ -n "$DISTRO_NAME" ]; then
|
||||
exec llama stack run "$DISTRO_NAME" "$@"
|
||||
fi
|
||||
|
||||
exec llama stack run "$@"
|
||||
EOF
|
||||
RUN chmod +x /usr/local/bin/llama-stack-entrypoint.sh
|
||||
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/llama-stack-entrypoint.sh"]
|
||||
|
|
@ -51,8 +51,8 @@ device: cpu
|
|||
You can access the HuggingFace trainer via the `starter` distribution:
|
||||
|
||||
```bash
|
||||
llama stack build --distro starter --image-type venv
|
||||
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
llama stack run starter
|
||||
```
|
||||
|
||||
### Usage Example
|
||||
|
|
|
|||
|
|
@ -175,8 +175,7 @@ llama-stack-client benchmarks register \
|
|||
**1. Start the Llama Stack API Server**
|
||||
|
||||
```bash
|
||||
# Build and run a distribution (example: together)
|
||||
llama stack build --distro together --image-type venv
|
||||
llama stack list-deps together | xargs -L1 uv pip install
|
||||
llama stack run together
|
||||
```
|
||||
|
||||
|
|
@ -209,7 +208,7 @@ The playground works with any Llama Stack distribution. Popular options include:
|
|||
<TabItem value="together" label="Together AI">
|
||||
|
||||
```bash
|
||||
llama stack build --distro together --image-type venv
|
||||
llama stack list-deps together | xargs -L1 uv pip install
|
||||
llama stack run together
|
||||
```
|
||||
|
||||
|
|
@ -222,7 +221,7 @@ llama stack run together
|
|||
<TabItem value="ollama" label="Ollama (Local)">
|
||||
|
||||
```bash
|
||||
llama stack build --distro ollama --image-type venv
|
||||
llama stack list-deps ollama | xargs -L1 uv pip install
|
||||
llama stack run ollama
|
||||
```
|
||||
|
||||
|
|
@ -235,7 +234,7 @@ llama stack run ollama
|
|||
<TabItem value="meta-reference" label="Meta Reference">
|
||||
|
||||
```bash
|
||||
llama stack build --distro meta-reference --image-type venv
|
||||
llama stack list-deps meta-reference | xargs -L1 uv pip install
|
||||
llama stack run meta-reference
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,8 @@ RAG enables your applications to reference and recall information from external
|
|||
In one terminal, start the Llama Stack server:
|
||||
|
||||
```bash
|
||||
uv run llama stack build --distro starter --image-type venv --run
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
llama stack run starter
|
||||
```
|
||||
|
||||
### 2. Connect with OpenAI Client
|
||||
|
|
|
|||
|
|
@ -62,6 +62,10 @@ The new `/v2` API must be introduced alongside the existing `/v1` API and run in
|
|||
|
||||
When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
|
||||
|
||||
### Deprecated APIs
|
||||
|
||||
Deprecated APIs are those that are no longer actively maintained or supported. Depreated APIs are marked with the flag `deprecated = True` in the OpenAPI spec. These APIs will be removed in a future release.
|
||||
|
||||
### API Stability vs. Provider Stability
|
||||
|
||||
The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
|
||||
|
|
|
|||
|
|
@ -158,17 +158,16 @@ under the LICENSE file in the root directory of this source tree.
|
|||
|
||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||
|
||||
### Using `llama stack build`
|
||||
### Setup for development
|
||||
|
||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
cd work/
|
||||
git clone https://github.com/meta-llama/llama-stack.git
|
||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||
cd llama-stack
|
||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
||||
uv run llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||
|
||||
# (Optional) If you are developing the llama-stack-client-python package, you can add it as an editable package.
|
||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||
uv add --editable ../llama-stack-client-python
|
||||
```
|
||||
|
||||
### Updating distribution configurations
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ def get_base_url(self) -> str:
|
|||
|
||||
## Testing the Provider
|
||||
|
||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
|
||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, install its dependencies with `llama stack list-deps together | xargs -L1 uv pip install`.
|
||||
|
||||
### 1. Integration Testing
|
||||
|
||||
|
|
|
|||
|
|
@ -5,225 +5,79 @@ sidebar_label: Build your own Distribution
|
|||
sidebar_position: 3
|
||||
---
|
||||
|
||||
This guide will walk you through the steps to get started with building a Llama Stack distribution from scratch with your choice of API providers.
|
||||
This guide walks you through inspecting existing distributions, customising their configuration, and building runnable artefacts for your own deployment.
|
||||
|
||||
### Explore existing distributions
|
||||
|
||||
### Setting your log level
|
||||
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
||||
|
||||
In order to specify the proper logging level users can apply the following environment variable `LLAMA_STACK_LOGGING` with the following format:
|
||||
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
||||
- `run.yaml` – sample run configuration (when provided).
|
||||
- Documentation fragments that power this site.
|
||||
|
||||
`LLAMA_STACK_LOGGING=server=debug;core=info`
|
||||
|
||||
Where each category in the following list:
|
||||
|
||||
- all
|
||||
- core
|
||||
- server
|
||||
- router
|
||||
- inference
|
||||
- agents
|
||||
- safety
|
||||
- eval
|
||||
- tools
|
||||
- client
|
||||
|
||||
Can be set to any of the following log levels:
|
||||
|
||||
- debug
|
||||
- info
|
||||
- warning
|
||||
- error
|
||||
- critical
|
||||
|
||||
The default global log level is `info`. `all` sets the log level for all components.
|
||||
|
||||
A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
|
||||
|
||||
### Llama Stack Build
|
||||
|
||||
In order to build your own distribution, we recommend you clone the `llama-stack` repository.
|
||||
|
||||
|
||||
```
|
||||
git clone git@github.com:meta-llama/llama-stack.git
|
||||
cd llama-stack
|
||||
pip install -e .
|
||||
```
|
||||
Use the CLI to build your distribution.
|
||||
The main points to consider are:
|
||||
1. **Image Type** - Do you want a venv environment or a Container (eg. Docker)
|
||||
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
|
||||
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
|
||||
|
||||
```
|
||||
llama stack build -h
|
||||
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--distro DISTRIBUTION] [--list-distros] [--image-type {container,venv}] [--image-name IMAGE_NAME] [--print-deps-only]
|
||||
[--run] [--providers PROVIDERS]
|
||||
|
||||
Build a Llama stack container
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to
|
||||
enter information interactively (default: None)
|
||||
--template TEMPLATE (deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default:
|
||||
None)
|
||||
--distro DISTRIBUTION, --distribution DISTRIBUTION
|
||||
Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default: None)
|
||||
--list-distros, --list-distributions
|
||||
Show the available distributions for building a Llama Stack distribution (default: False)
|
||||
--image-type {container,venv}
|
||||
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
|
||||
--image-name IMAGE_NAME
|
||||
[for image-type=container|venv] Name of the virtual environment to use for the build. If not specified, currently active environment will be used if found. (default:
|
||||
None)
|
||||
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
||||
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
||||
--providers PROVIDERS
|
||||
Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per
|
||||
API. (default: None)
|
||||
```
|
||||
|
||||
After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.
|
||||
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="template" label="Building from a template">
|
||||
To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
|
||||
<TabItem value="container" label="Building a container">
|
||||
|
||||
The following command will allow you to see the available templates and their corresponding providers.
|
||||
```
|
||||
llama stack build --list-templates
|
||||
Use the Containerfile at `containers/Containerfile`, which installs `llama-stack`, resolves distribution dependencies via `llama stack list-deps`, and sets the entrypoint to `llama stack run`.
|
||||
|
||||
```bash
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg DISTRO_NAME=starter \
|
||||
--tag llama-stack:starter
|
||||
```
|
||||
|
||||
```
|
||||
------------------------------+-----------------------------------------------------------------------------+
|
||||
| Template Name | Description |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| watsonx | Use watsonx for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| vllm-gpu | Use a built-in vLLM engine for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| together | Use Together.AI for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| tgi | Use (an external) TGI server for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| starter | Quick start template for running Llama Stack with several popular providers |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| sambanova | Use SambaNova for running LLM inference and safety |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| remote-vllm | Use (an external) vLLM server for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| postgres-demo | Quick start template for running Llama Stack with several popular providers |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| passthrough | Use Passthrough hosted llama-stack endpoint for LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| open-benchmark | Distribution for running open benchmarks |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| ollama | Use (an external) Ollama server for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| nvidia | Use NVIDIA NIM for running LLM inference, evaluation and safety |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| meta-reference-gpu | Use Meta Reference for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| llama_api | Distribution for running e2e tests in CI |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| groq | Use Groq for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| fireworks | Use Fireworks.AI for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| experimental-post-training | Experimental template for post training |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| dell | Dell's distribution of Llama Stack. TGI inference via Dell's custom |
|
||||
| | container |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| ci-tests | Distribution for running e2e tests in CI |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| cerebras | Use Cerebras for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| bedrock | Use AWS Bedrock for running LLM inference and safety |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
```
|
||||
Handy build arguments:
|
||||
|
||||
You may then pick a template to build your distribution with providers fitted to your liking.
|
||||
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
||||
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/run.yaml`).
|
||||
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
||||
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
||||
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
||||
- `KEEP_WORKSPACE=1` – retain `/workspace` in the final image if you need to access additional files (such as sample configs or provider bundles).
|
||||
|
||||
For example, to build a distribution with TGI as the inference provider, you can run:
|
||||
```
|
||||
$ llama stack build --distro starter
|
||||
...
|
||||
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
||||
```
|
||||
Make sure any custom `build.yaml`, run configs, or provider directories you reference are included in the Docker build context so the Containerfile can read them.
|
||||
|
||||
```{tip}
|
||||
The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="scratch" label="Building from Scratch">
|
||||
<TabItem value="external" label="Building with external providers">
|
||||
|
||||
If the provided templates do not fit your use case, you could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations.
|
||||
External providers live outside the main repository but can be bundled by pointing `external_providers_dir` to a directory that contains your provider packages.
|
||||
|
||||
It would be best to start with a template and understand the structure of the config file and the various concepts ( APIS, providers, resources, etc.) before starting from scratch.
|
||||
```
|
||||
llama stack build
|
||||
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
||||
2. Update `build.yaml` with the directory and provider entries.
|
||||
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/run.yaml` if you want to bake the config.
|
||||
|
||||
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
|
||||
> Enter the image type you want your Llama Stack to be built as (container or venv): venv
|
||||
|
||||
Llama Stack is composed of several APIs working together. Let's select
|
||||
the provider types (implementations) you want to use for these APIs.
|
||||
|
||||
Tip: use <TAB> to see options for the providers.
|
||||
|
||||
> Enter provider for API inference: inline::meta-reference
|
||||
> Enter provider for API safety: inline::llama-guard
|
||||
> Enter provider for API agents: inline::meta-reference
|
||||
> Enter provider for API memory: inline::faiss
|
||||
> Enter provider for API datasetio: inline::meta-reference
|
||||
> Enter provider for API scoring: inline::meta-reference
|
||||
> Enter provider for API eval: inline::meta-reference
|
||||
> Enter provider for API telemetry: inline::meta-reference
|
||||
|
||||
> (Optional) Enter a short description for your Llama Stack:
|
||||
|
||||
You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml`
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="config" label="Building from a pre-existing build config file">
|
||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
||||
|
||||
- The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
|
||||
|
||||
```
|
||||
llama stack build --config llama_stack/distributions/starter/build.yaml
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="external" label="Building with External Providers">
|
||||
|
||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
|
||||
|
||||
To build a distribution with external providers, you need to:
|
||||
|
||||
1. Configure the `external_providers_dir` in your build configuration file:
|
||||
Example `build.yaml` excerpt for a custom Ollama provider:
|
||||
|
||||
```yaml
|
||||
# Example my-external-stack.yaml with external providers
|
||||
version: '2'
|
||||
distribution_spec:
|
||||
description: Custom distro for CI tests
|
||||
providers:
|
||||
inference:
|
||||
- remote::custom_ollama
|
||||
# Add more providers as needed
|
||||
image_type: container
|
||||
image_name: ci-test
|
||||
# Path to external provider implementations
|
||||
external_providers_dir: ~/.llama/providers.d
|
||||
external_providers_dir: /workspace/providers.d
|
||||
```
|
||||
|
||||
Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
|
||||
|
||||
```python
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
|
||||
def get_provider_spec() -> ProviderSpec:
|
||||
return ProviderSpec(
|
||||
provider_type="remote::custom_ollama",
|
||||
module="llama_stack_ollama_provider",
|
||||
config_class="llama_stack_ollama_provider.config.OllamaImplConfig",
|
||||
pip_packages=[
|
||||
"ollama",
|
||||
"aiohttp",
|
||||
"llama-stack-provider-ollama",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
Here's an example for a custom Ollama provider:
|
||||
|
|
@ -245,53 +99,22 @@ The `pip_packages` section lists the Python packages required by the provider, a
|
|||
provider package itself. The package must be available on PyPI or can be provided from a local
|
||||
directory or a git repository (git must be installed on the build environment).
|
||||
|
||||
2. Build your distribution using the config file:
|
||||
For deeper guidance, see the [External Providers documentation](../providers/external/).
|
||||
|
||||
```
|
||||
llama stack build --config my-external-stack.yaml
|
||||
```
|
||||
|
||||
For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external/).
|
||||
</TabItem>
|
||||
<TabItem value="container" label="Building Container">
|
||||
</Tabs>
|
||||
|
||||
:::tip Podman Alternative
|
||||
Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
||||
:::
|
||||
### Run your stack server
|
||||
|
||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
||||
|
||||
```
|
||||
llama stack build --distro starter --image-type container
|
||||
```
|
||||
|
||||
```
|
||||
$ llama stack build --distro starter --image-type container
|
||||
...
|
||||
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
||||
...
|
||||
```
|
||||
|
||||
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
|
||||
```
|
||||
|
||||
Now set some environment variables for the inference model ID and Llama Stack Port and create a local directory to mount into the container's file system.
|
||||
After building the image, launch it directly with Docker or Podman—the entrypoint calls `llama stack run` using the baked distribution or the bundled run config:
|
||||
|
||||
```bash
|
||||
export INFERENCE_MODEL="llama3.2:3b"
|
||||
export LLAMA_STACK_PORT=8321
|
||||
mkdir -p ~/.llama
|
||||
```
|
||||
|
||||
After this step is successful, you should be able to find the built container image and test it with the below Docker command:
|
||||
|
||||
```
|
||||
docker run -d \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||
localhost/distribution-ollama:dev \
|
||||
llama-stack:starter \
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
|
|
@ -311,131 +134,14 @@ Here are the docker flags and their uses:
|
|||
|
||||
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
### Running your Stack server
|
||||
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack build` step.
|
||||
If you prepared a custom run config, mount it into the container and reference it explicitly:
|
||||
|
||||
```bash
|
||||
docker run \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $(pwd)/run.yaml:/app/run.yaml \
|
||||
llama-stack:starter \
|
||||
/app/run.yaml
|
||||
```
|
||||
llama stack run -h
|
||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
||||
[--image-type {venv}] [--enable-ui]
|
||||
[config | distro]
|
||||
|
||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||
|
||||
positional arguments:
|
||||
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||
--image-name IMAGE_NAME
|
||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||
--image-type {venv}
|
||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||
--enable-ui Start the UI server (default: False)
|
||||
```
|
||||
|
||||
**Note:** Container images built with `llama stack build --image-type container` cannot be run using `llama stack run`. Instead, they must be run directly using Docker or Podman commands as shown in the container building section above.
|
||||
|
||||
```
|
||||
# Start using template name
|
||||
llama stack run tgi
|
||||
|
||||
# Start using config file
|
||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
```
|
||||
|
||||
```
|
||||
$ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
|
||||
Serving API inspect
|
||||
GET /health
|
||||
GET /providers/list
|
||||
GET /routes/list
|
||||
Serving API inference
|
||||
POST /inference/chat_completion
|
||||
POST /inference/completion
|
||||
POST /inference/embeddings
|
||||
...
|
||||
Serving API agents
|
||||
POST /agents/create
|
||||
POST /agents/session/create
|
||||
POST /agents/turn/create
|
||||
POST /agents/delete
|
||||
POST /agents/session/delete
|
||||
POST /agents/session/get
|
||||
POST /agents/step/get
|
||||
POST /agents/turn/get
|
||||
|
||||
Listening on ['::', '0.0.0.0']:8321
|
||||
INFO: Started server process [2935911]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
||||
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
||||
```
|
||||
|
||||
### Listing Distributions
|
||||
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
||||
|
||||
```
|
||||
llama stack list -h
|
||||
usage: llama stack list [-h]
|
||||
|
||||
list the build stacks
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
```
|
||||
|
||||
Example Usage
|
||||
|
||||
```
|
||||
llama stack list
|
||||
```
|
||||
|
||||
```
|
||||
------------------------------+-----------------------------------------------------------------+--------------+------------+
|
||||
| Stack Name | Path | Build Config | Run Config |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| together | ~/.llama/distributions/together | Yes | No |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| bedrock | ~/.llama/distributions/bedrock | Yes | No |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| starter | ~/.llama/distributions/starter | Yes | Yes |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| remote-vllm | ~/.llama/distributions/remote-vllm | Yes | Yes |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
```
|
||||
|
||||
### Removing a Distribution
|
||||
Use the remove command to delete a distribution you've previously built.
|
||||
|
||||
```
|
||||
llama stack rm -h
|
||||
usage: llama stack rm [-h] [--all] [name]
|
||||
|
||||
Remove the build stack
|
||||
|
||||
positional arguments:
|
||||
name Name of the stack to delete (default: None)
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--all, -a Delete all stacks (use with caution) (default: False)
|
||||
```
|
||||
|
||||
Example
|
||||
```
|
||||
llama stack rm llamastack-test
|
||||
```
|
||||
|
||||
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they're no longer needed.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
If you encounter any issues, ask questions in our discord or search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ This avoids the overhead of setting up a server.
|
|||
```bash
|
||||
# setup
|
||||
uv pip install llama-stack
|
||||
llama stack build --distro starter --image-type venv
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
```python
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
|
|||
uv venv starter --python 3.12
|
||||
source starter/bin/activate # On Windows: starter\Scripts\activate
|
||||
pip install --no-cache llama-stack==0.2.2
|
||||
llama stack build --distro starter --image-type venv
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
export FIREWORKS_API_KEY=<SOME_KEY>
|
||||
llama stack run starter --port 5050
|
||||
```
|
||||
|
|
|
|||
|
|
@ -166,10 +166,10 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Install the distribution dependencies before launching:
|
||||
|
||||
```bash
|
||||
llama stack build --distro dell --image-type venv
|
||||
llama stack list-deps dell | xargs -L1 uv pip install
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
|
|
|
|||
|
|
@ -81,10 +81,10 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --distro meta-reference-gpu --image-type venv
|
||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||
--port 8321
|
||||
|
|
|
|||
|
|
@ -136,11 +136,11 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
llama stack run ./run.yaml \
|
||||
|
|
|
|||
|
|
@ -169,7 +169,11 @@ docker run \
|
|||
Ensure you have configured the starter distribution using the environment variables explained above.
|
||||
|
||||
```bash
|
||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
||||
# Install dependencies for the starter distribution
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run the server
|
||||
uv run --with llama-stack llama stack run starter
|
||||
```
|
||||
|
||||
## Example Usage
|
||||
|
|
|
|||
|
|
@ -23,6 +23,17 @@ Another simple way to start interacting with Llama Stack is to just spin up a co
|
|||
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](../deploying/kubernetes_deployment) for more details.
|
||||
|
||||
|
||||
## Configure logging
|
||||
|
||||
Control log output via environment variables before starting the server.
|
||||
|
||||
- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`.
|
||||
- Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
|
||||
- Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
|
||||
- `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.
|
||||
|
||||
Export these variables prior to running `llama stack run`, launching a container, or starting the server through any other pathway.
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
|
|
|
|||
|
|
@ -58,15 +58,19 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
|
|||
|
||||
<Tabs>
|
||||
<TabItem value="venv" label="Using venv">
|
||||
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
||||
You can use Python to install dependencies and run the Llama Stack server, which is useful for testing and development.
|
||||
|
||||
Llama Stack uses a [YAML configuration file](../distributions/configuration) to specify the stack setup,
|
||||
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml).
|
||||
Now let's build and run the Llama Stack config for Ollama.
|
||||
Now let's install dependencies and run the Llama Stack config for Ollama.
|
||||
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
||||
|
||||
```bash
|
||||
llama stack build --distro starter --image-type venv --run
|
||||
# Install dependencies for the starter distribution
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run the server
|
||||
llama stack run starter
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="container" label="Using a Container">
|
||||
|
|
@ -304,7 +308,7 @@ stream = agent.create_turn(
|
|||
for event in AgentEventLogger().log(stream):
|
||||
event.print()
|
||||
```
|
||||
### ii. Run the Script
|
||||
#### ii. Run the Script
|
||||
Let's run the script using `uv`
|
||||
```bash
|
||||
uv run python agent.py
|
||||
|
|
|
|||
|
|
@ -24,10 +24,13 @@ ollama run llama3.2:3b --keepalive 60m
|
|||
|
||||
#### Step 2: Run the Llama Stack server
|
||||
|
||||
We will use `uv` to run the Llama Stack server.
|
||||
We will use `uv` to install dependencies and run the Llama Stack server.
|
||||
```bash
|
||||
OLLAMA_URL=http://localhost:11434 \
|
||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
||||
# Install dependencies for the starter distribution
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run the server
|
||||
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||
```
|
||||
#### Step 3: Run the demo
|
||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
---
|
||||
description: "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
description: "Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
sidebar_label: Eval
|
||||
title: Eval
|
||||
---
|
||||
|
|
@ -8,6 +10,8 @@ title: Eval
|
|||
|
||||
## Overview
|
||||
|
||||
Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
|
||||
This section contains documentation for all available providers for the **eval** API.
|
||||
|
|
|
|||
|
|
@ -240,6 +240,6 @@ additional_pip_packages:
|
|||
- sqlalchemy[asyncio]
|
||||
```
|
||||
|
||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
||||
No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
|
||||
|
||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||
|
|
|
|||
|
|
@ -123,7 +123,8 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||
"!uv run --with llama-stack llama stack build --distro together\n",
|
||||
"!uv run --with llama-stack llama stack list-deps together | xargs -L1 uv pip install\n",
|
||||
"!uv run --with llama-stack llama stack run together\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
|
|
|
|||
|
|
@ -233,7 +233,8 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
|
||||
"!uv run --with llama-stack llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install\n",
|
||||
"!uv run --with llama-stack llama stack run meta-reference-gpu\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
|
|
|
|||
|
|
@ -223,7 +223,8 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||
"!uv run --with llama-stack llama stack build --distro llama_api\n",
|
||||
"!uv run --with llama-stack llama stack list-deps llama_api | xargs -L1 uv pip install\n",
|
||||
"!uv run --with llama-stack llama stack run llama_api\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
|
|
|
|||
|
|
@ -2864,7 +2864,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
|
||||
"!llama stack list-deps experimental-post-training | xargs -L1 uv pip install"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"!pip install -U llama-stack\n",
|
||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
|
||||
"llama stack list-deps fireworks | xargs -L1 uv pip install\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro together --image-type venv"
|
||||
"!uv run llama stack list-deps together | xargs -L1 uv pip install\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -136,7 +136,8 @@
|
|||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
||||
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
@ -172,7 +173,7 @@
|
|||
"\n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes using pkill command\n",
|
||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
||||
" os.system(\"pkill -f llama_stack.core.server.server\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -105,7 +105,8 @@
|
|||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
||||
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@
|
|||
"metadata": {},
|
||||
"source": [
|
||||
"```bash\n",
|
||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
||||
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@
|
|||
"metadata": {},
|
||||
"source": [
|
||||
"```bash\n",
|
||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
||||
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@
|
|||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||
"!uv run --with llama-stack llama stack build --distro starter\n",
|
||||
"!uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
|
|
|
|||
|
|
@ -47,11 +47,11 @@ function QuickStart() {
|
|||
<pre><code>{`# Install uv and start Ollama
|
||||
ollama run llama3.2:3b --keepalive 60m
|
||||
|
||||
# Install server dependencies
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run Llama Stack server
|
||||
OLLAMA_URL=http://localhost:11434 \\
|
||||
uv run --with llama-stack \\
|
||||
llama stack build --distro starter \\
|
||||
--image-type venv --run
|
||||
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||
|
||||
# Try the Python SDK
|
||||
from llama_stack_client import LlamaStackClient
|
||||
|
|
|
|||
4
docs/static/deprecated-llama-stack-spec.html
vendored
4
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -13449,8 +13449,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Eval",
|
||||
"description": "",
|
||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||
"x-displayName": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "Files",
|
||||
|
|
|
|||
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -10196,9 +10196,9 @@ tags:
|
|||
- name: Datasets
|
||||
description: ''
|
||||
- name: Eval
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
x-displayName: Evaluations
|
||||
- name: Files
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
|
|
|
|||
|
|
@ -5518,8 +5518,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Eval",
|
||||
"description": "",
|
||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||
"x-displayName": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining (Coming Soon)",
|
||||
|
|
|
|||
|
|
@ -4119,9 +4119,9 @@ tags:
|
|||
- name: Datasets
|
||||
description: ''
|
||||
- name: Eval
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
x-displayName: Evaluations
|
||||
- name: PostTraining (Coming Soon)
|
||||
description: ''
|
||||
x-tagGroups:
|
||||
|
|
|
|||
34
docs/static/llama-stack-spec.html
vendored
34
docs/static/llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
|||
"Conversations"
|
||||
],
|
||||
"summary": "Create a conversation.",
|
||||
"description": "Create a conversation.",
|
||||
"description": "Create a conversation.\nCreate a conversation.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -326,8 +326,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Get a conversation with the given ID.",
|
||||
"description": "Get a conversation with the given ID.",
|
||||
"summary": "Retrieve a conversation.",
|
||||
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -369,8 +369,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Update a conversation's metadata with the given ID.",
|
||||
"description": "Update a conversation's metadata with the given ID.",
|
||||
"summary": "Update a conversation.",
|
||||
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -422,8 +422,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation with the given ID.",
|
||||
"description": "Delete a conversation with the given ID.",
|
||||
"summary": "Delete a conversation.",
|
||||
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -467,8 +467,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "List items in the conversation.",
|
||||
"description": "List items in the conversation.",
|
||||
"summary": "List items.",
|
||||
"description": "List items.\nList items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -597,8 +597,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Create items in the conversation.",
|
||||
"description": "Create items in the conversation.",
|
||||
"summary": "Create items.",
|
||||
"description": "Create items.\nCreate items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -652,8 +652,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Retrieve a conversation item.",
|
||||
"description": "Retrieve a conversation item.",
|
||||
"summary": "Retrieve an item.",
|
||||
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -704,8 +704,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation item.",
|
||||
"description": "Delete a conversation item.",
|
||||
"summary": "Delete an item.",
|
||||
"description": "Delete an item.\nDelete a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -13251,8 +13251,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Conversations",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for conversation management operations."
|
||||
"description": "Protocol for conversation management operations.",
|
||||
"x-displayName": "Conversations"
|
||||
},
|
||||
{
|
||||
"name": "Files",
|
||||
|
|
|
|||
56
docs/static/llama-stack-spec.yaml
vendored
56
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -192,7 +192,10 @@ paths:
|
|||
tags:
|
||||
- Conversations
|
||||
summary: Create a conversation.
|
||||
description: Create a conversation.
|
||||
description: >-
|
||||
Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -222,8 +225,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Get a conversation with the given ID.
|
||||
description: Get a conversation with the given ID.
|
||||
summary: Retrieve a conversation.
|
||||
description: >-
|
||||
Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -252,9 +258,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: >-
|
||||
Update a conversation's metadata with the given ID.
|
||||
summary: Update a conversation.
|
||||
description: >-
|
||||
Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
|
|
@ -290,8 +297,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation with the given ID.
|
||||
description: Delete a conversation with the given ID.
|
||||
summary: Delete a conversation.
|
||||
description: >-
|
||||
Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -321,8 +331,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: List items in the conversation.
|
||||
description: List items in the conversation.
|
||||
summary: List items.
|
||||
description: >-
|
||||
List items.
|
||||
|
||||
List items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -495,8 +508,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Create items in the conversation.
|
||||
description: Create items in the conversation.
|
||||
summary: Create items.
|
||||
description: >-
|
||||
Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -532,8 +548,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Retrieve a conversation item.
|
||||
description: Retrieve a conversation item.
|
||||
summary: Retrieve an item.
|
||||
description: >-
|
||||
Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -568,8 +587,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation item.
|
||||
description: Delete a conversation item.
|
||||
summary: Delete an item.
|
||||
description: >-
|
||||
Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -10146,9 +10168,9 @@ tags:
|
|||
- `background`
|
||||
x-displayName: Agents
|
||||
- name: Conversations
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for conversation management operations.
|
||||
x-displayName: Conversations
|
||||
- name: Files
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
|
|
|
|||
38
docs/static/stainless-llama-stack-spec.html
vendored
38
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
|||
"Conversations"
|
||||
],
|
||||
"summary": "Create a conversation.",
|
||||
"description": "Create a conversation.",
|
||||
"description": "Create a conversation.\nCreate a conversation.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -326,8 +326,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Get a conversation with the given ID.",
|
||||
"description": "Get a conversation with the given ID.",
|
||||
"summary": "Retrieve a conversation.",
|
||||
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -369,8 +369,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Update a conversation's metadata with the given ID.",
|
||||
"description": "Update a conversation's metadata with the given ID.",
|
||||
"summary": "Update a conversation.",
|
||||
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -422,8 +422,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation with the given ID.",
|
||||
"description": "Delete a conversation with the given ID.",
|
||||
"summary": "Delete a conversation.",
|
||||
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -467,8 +467,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "List items in the conversation.",
|
||||
"description": "List items in the conversation.",
|
||||
"summary": "List items.",
|
||||
"description": "List items.\nList items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -597,8 +597,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Create items in the conversation.",
|
||||
"description": "Create items in the conversation.",
|
||||
"summary": "Create items.",
|
||||
"description": "Create items.\nCreate items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -652,8 +652,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Retrieve a conversation item.",
|
||||
"description": "Retrieve a conversation item.",
|
||||
"summary": "Retrieve an item.",
|
||||
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -704,8 +704,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation item.",
|
||||
"description": "Delete a conversation item.",
|
||||
"summary": "Delete an item.",
|
||||
"description": "Delete an item.\nDelete a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -17928,8 +17928,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Conversations",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for conversation management operations."
|
||||
"description": "Protocol for conversation management operations.",
|
||||
"x-displayName": "Conversations"
|
||||
},
|
||||
{
|
||||
"name": "DatasetIO",
|
||||
|
|
@ -17941,8 +17941,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Eval",
|
||||
"description": "",
|
||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||
"x-displayName": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "Files",
|
||||
|
|
|
|||
60
docs/static/stainless-llama-stack-spec.yaml
vendored
60
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -195,7 +195,10 @@ paths:
|
|||
tags:
|
||||
- Conversations
|
||||
summary: Create a conversation.
|
||||
description: Create a conversation.
|
||||
description: >-
|
||||
Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -225,8 +228,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Get a conversation with the given ID.
|
||||
description: Get a conversation with the given ID.
|
||||
summary: Retrieve a conversation.
|
||||
description: >-
|
||||
Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -255,9 +261,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: >-
|
||||
Update a conversation's metadata with the given ID.
|
||||
summary: Update a conversation.
|
||||
description: >-
|
||||
Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
|
|
@ -293,8 +300,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation with the given ID.
|
||||
description: Delete a conversation with the given ID.
|
||||
summary: Delete a conversation.
|
||||
description: >-
|
||||
Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -324,8 +334,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: List items in the conversation.
|
||||
description: List items in the conversation.
|
||||
summary: List items.
|
||||
description: >-
|
||||
List items.
|
||||
|
||||
List items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -498,8 +511,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Create items in the conversation.
|
||||
description: Create items in the conversation.
|
||||
summary: Create items.
|
||||
description: >-
|
||||
Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -535,8 +551,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Retrieve a conversation item.
|
||||
description: Retrieve a conversation item.
|
||||
summary: Retrieve an item.
|
||||
description: >-
|
||||
Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -571,8 +590,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation item.
|
||||
description: Delete a conversation item.
|
||||
summary: Delete an item.
|
||||
description: >-
|
||||
Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -13533,17 +13555,17 @@ tags:
|
|||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: Conversations
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for conversation management operations.
|
||||
x-displayName: Conversations
|
||||
- name: DatasetIO
|
||||
description: ''
|
||||
- name: Datasets
|
||||
description: ''
|
||||
- name: Eval
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
x-displayName: Evaluations
|
||||
- name: Files
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
|
|
|
|||
|
|
@ -78,17 +78,14 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
|
||||
## Build, Configure, and Run Llama Stack
|
||||
|
||||
1. **Build the Llama Stack**:
|
||||
Build the Llama Stack using the `starter` template:
|
||||
1. **Install dependencies**:
|
||||
```bash
|
||||
uv run --with llama-stack llama stack build --distro starter --image-type venv
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
```
|
||||
**Expected Output:**
|
||||
|
||||
2. **Start the distribution**:
|
||||
```bash
|
||||
...
|
||||
Build Successful!
|
||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
||||
llama stack run starter
|
||||
```
|
||||
|
||||
3. **Set the ENV variables by exporting them to the terminal**:
|
||||
|
|
|
|||
|
|
@ -173,7 +173,9 @@ class ConversationItemDeletedResource(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Conversations(Protocol):
|
||||
"""Protocol for conversation management operations."""
|
||||
"""Conversations
|
||||
|
||||
Protocol for conversation management operations."""
|
||||
|
||||
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_conversation(
|
||||
|
|
@ -181,6 +183,8 @@ class Conversations(Protocol):
|
|||
) -> Conversation:
|
||||
"""Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
|
||||
:param items: Initial items to include in the conversation context.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
:returns: The created conversation object.
|
||||
|
|
@ -189,7 +193,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||
"""Get a conversation with the given ID.
|
||||
"""Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The conversation object.
|
||||
|
|
@ -198,7 +204,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||
"""Update a conversation's metadata with the given ID.
|
||||
"""Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
|
|
@ -208,7 +216,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||
"""Delete a conversation with the given ID.
|
||||
"""Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The deleted conversation resource.
|
||||
|
|
@ -217,7 +227,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||
"""Create items in the conversation.
|
||||
"""Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param items: Items to include in the conversation context.
|
||||
|
|
@ -227,7 +239,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||
"""Retrieve a conversation item.
|
||||
"""Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
|
|
@ -244,7 +258,9 @@ class Conversations(Protocol):
|
|||
limit: int | NotGiven = NOT_GIVEN,
|
||||
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||||
) -> ConversationItemList:
|
||||
"""List items in the conversation.
|
||||
"""List items.
|
||||
|
||||
List items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param after: An item ID to list items after, used in pagination.
|
||||
|
|
@ -259,7 +275,9 @@ class Conversations(Protocol):
|
|||
async def openai_delete_conversation_item(
|
||||
self, conversation_id: str, item_id: str
|
||||
) -> ConversationItemDeletedResource:
|
||||
"""Delete a conversation item.
|
||||
"""Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
|
|
|
|||
|
|
@ -82,7 +82,9 @@ class EvaluateResponse(BaseModel):
|
|||
|
||||
|
||||
class Eval(Protocol):
|
||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
"""Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
|
|
|
|||
182
llama_stack/cli/stack/_list_deps.py
Normal file
182
llama_stack/cli/stack/_list_deps.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.core.build import get_provider_dependencies
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
BuildProvider,
|
||||
DistributionSpec,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.stack import replace_env_vars
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||
|
||||
logger = get_logger(name=__name__, category="cli")
|
||||
|
||||
|
||||
# These are the dependencies needed by the distribution server.
|
||||
# `llama-stack` is automatically installed by the installation script.
|
||||
SERVER_DEPENDENCIES = [
|
||||
"aiosqlite",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"uvicorn",
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
]
|
||||
|
||||
|
||||
def format_output_deps_only(
|
||||
normal_deps: list[str],
|
||||
special_deps: list[str],
|
||||
external_deps: list[str],
|
||||
uv: bool = False,
|
||||
) -> str:
|
||||
"""Format dependencies as a list."""
|
||||
lines = []
|
||||
|
||||
uv_str = ""
|
||||
if uv:
|
||||
uv_str = "uv pip install "
|
||||
|
||||
# Quote deps with commas
|
||||
quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
|
||||
lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
|
||||
|
||||
for special_dep in special_deps:
|
||||
lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
|
||||
|
||||
for external_dep in external_deps:
|
||||
lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||
if args.config:
|
||||
try:
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
|
||||
config_file = resolve_config_or_distro(args.config, Mode.BUILD)
|
||||
except ValueError as e:
|
||||
cprint(
|
||||
f"Could not parse config file {args.config}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if config_file:
|
||||
with open(config_file) as f:
|
||||
try:
|
||||
contents = yaml.safe_load(f)
|
||||
contents = replace_env_vars(contents)
|
||||
build_config = BuildConfig(**contents)
|
||||
build_config.image_type = "venv"
|
||||
except Exception as e:
|
||||
cprint(
|
||||
f"Could not parse config file {config_file}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.providers:
|
||||
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||
for api_provider in args.providers.split(","):
|
||||
if "=" not in api_provider:
|
||||
cprint(
|
||||
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
api, provider_type = api_provider.split("=")
|
||||
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||
if providers_for_api is None:
|
||||
cprint(
|
||||
f"{api} is not a valid API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if provider_type in providers_for_api:
|
||||
provider = BuildProvider(
|
||||
provider_type=provider_type,
|
||||
module=None,
|
||||
)
|
||||
provider_list.setdefault(api, []).append(provider)
|
||||
else:
|
||||
cprint(
|
||||
f"{provider_type} is not a valid provider for the {api} API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
distribution_spec = DistributionSpec(
|
||||
providers=provider_list,
|
||||
description=",".join(args.providers),
|
||||
)
|
||||
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
|
||||
|
||||
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
|
||||
# Add external API dependencies
|
||||
if build_config.external_apis_dir:
|
||||
from llama_stack.core.external import load_external_apis
|
||||
|
||||
external_apis = load_external_apis(build_config)
|
||||
if external_apis:
|
||||
for _, api_spec in external_apis.items():
|
||||
normal_deps.extend(api_spec.pip_packages)
|
||||
|
||||
# Format and output based on requested format
|
||||
output = format_output_deps_only(
|
||||
normal_deps=normal_deps,
|
||||
special_deps=special_deps,
|
||||
external_deps=external_provider_dependencies,
|
||||
uv=args.format == "uv",
|
||||
)
|
||||
|
||||
print(output)
|
||||
|
||||
|
||||
def quote_if_needed(dep):
|
||||
# Add quotes if the dependency contains special characters that need escaping in shell
|
||||
# This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
|
||||
needs_quoting = any(char in dep for char in [",", "<", ">", "="])
|
||||
return f"'{dep}'" if needs_quoting else dep
|
||||
|
||||
|
||||
def quote_special_dep(dep_string):
|
||||
"""
|
||||
Quote individual packages in a special dependency string.
|
||||
Special deps may contain multiple packages and flags like --extra-index-url.
|
||||
We need to quote only the package specs that contain special characters.
|
||||
"""
|
||||
parts = dep_string.split()
|
||||
quoted_parts = []
|
||||
|
||||
for part in parts:
|
||||
# Don't quote flags (they start with -)
|
||||
if part.startswith("-"):
|
||||
quoted_parts.append(part)
|
||||
else:
|
||||
# Quote package specs that need it
|
||||
quoted_parts.append(quote_if_needed(part))
|
||||
|
||||
return " ".join(quoted_parts)
|
||||
|
|
@ -8,6 +8,9 @@ import textwrap
|
|||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, category="cli")
|
||||
|
||||
|
||||
class StackBuild(Subcommand):
|
||||
|
|
@ -16,7 +19,7 @@ class StackBuild(Subcommand):
|
|||
self.parser = subparsers.add_parser(
|
||||
"build",
|
||||
prog="llama stack build",
|
||||
description="Build a Llama stack container",
|
||||
description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
|
|
@ -93,6 +96,9 @@ the build. If not specified, currently active environment will be used if found.
|
|||
)
|
||||
|
||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||
logger.warning(
|
||||
"The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
|
||||
)
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._build import run_stack_build_command
|
||||
|
|
|
|||
51
llama_stack/cli/stack/list_deps.py
Normal file
51
llama_stack/cli/stack/list_deps.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import argparse
|
||||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
|
||||
class StackListDeps(Subcommand):
|
||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||
super().__init__()
|
||||
self.parser = subparsers.add_parser(
|
||||
"list-deps",
|
||||
prog="llama stack list-deps",
|
||||
description="list the dependencies for a llama stack distribution",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._run_stack_list_deps_command)
|
||||
|
||||
def _add_arguments(self):
|
||||
self.parser.add_argument(
|
||||
"config",
|
||||
type=str,
|
||||
nargs="?", # Make it optional
|
||||
metavar="config | distro",
|
||||
help="Path to config file to use or name of known distro (llama stack list for a list).",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--providers",
|
||||
type=str,
|
||||
default=None,
|
||||
help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--format",
|
||||
type=str,
|
||||
choices=["uv", "deps-only"],
|
||||
default="deps-only",
|
||||
help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
|
||||
)
|
||||
|
||||
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._list_deps import run_stack_list_deps_command
|
||||
|
||||
return run_stack_list_deps_command(args)
|
||||
|
|
@ -13,6 +13,7 @@ from llama_stack.cli.subcommand import Subcommand
|
|||
|
||||
from .build import StackBuild
|
||||
from .list_apis import StackListApis
|
||||
from .list_deps import StackListDeps
|
||||
from .list_providers import StackListProviders
|
||||
from .remove import StackRemove
|
||||
from .run import StackRun
|
||||
|
|
@ -39,6 +40,7 @@ class StackParser(Subcommand):
|
|||
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
||||
|
||||
# Add sub-commands
|
||||
StackListDeps.create(subparsers)
|
||||
StackBuild.create(subparsers)
|
||||
StackListApis.create(subparsers)
|
||||
StackListProviders.create(subparsers)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,28 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import sys
|
||||
from enum import Enum
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
Provider,
|
||||
StackRunConfig,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.resolver import InvalidProviderError
|
||||
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||
|
||||
|
||||
class ImageType(Enum):
|
||||
|
|
@ -19,3 +40,91 @@ def print_subcommand_description(parser, subparsers):
|
|||
description = subcommand.description
|
||||
description_text += f" {name:<21} {description}\n"
|
||||
parser.epilog = description_text
|
||||
|
||||
|
||||
def generate_run_config(
|
||||
build_config: BuildConfig,
|
||||
build_dir: Path,
|
||||
image_name: str,
|
||||
) -> Path:
|
||||
"""
|
||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||
"""
|
||||
apis = list(build_config.distribution_spec.providers.keys())
|
||||
run_config = StackRunConfig(
|
||||
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||
image_name=image_name,
|
||||
apis=apis,
|
||||
providers={},
|
||||
external_providers_dir=build_config.external_providers_dir
|
||||
if build_config.external_providers_dir
|
||||
else EXTERNAL_PROVIDERS_DIR,
|
||||
)
|
||||
# build providers dict
|
||||
provider_registry = get_provider_registry(build_config)
|
||||
for api in apis:
|
||||
run_config.providers[api] = []
|
||||
providers = build_config.distribution_spec.providers[api]
|
||||
|
||||
for provider in providers:
|
||||
pid = provider.provider_type.split("::")[-1]
|
||||
|
||||
p = provider_registry[Api(api)][provider.provider_type]
|
||||
if p.deprecation_error:
|
||||
raise InvalidProviderError(p.deprecation_error)
|
||||
|
||||
try:
|
||||
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
||||
except (ModuleNotFoundError, ValueError) as exc:
|
||||
# HACK ALERT:
|
||||
# This code executes after building is done, the import cannot work since the
|
||||
# package is either available in the venv or container - not available on the host.
|
||||
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
||||
# external
|
||||
cprint(
|
||||
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
||||
color="yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Set config_type to None to avoid UnboundLocalError
|
||||
config_type = None
|
||||
|
||||
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
||||
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
||||
else:
|
||||
config = {}
|
||||
|
||||
p_spec = Provider(
|
||||
provider_id=pid,
|
||||
provider_type=provider.provider_type,
|
||||
config=config,
|
||||
module=provider.module,
|
||||
)
|
||||
run_config.providers[api].append(p_spec)
|
||||
|
||||
run_config_file = build_dir / f"{image_name}-run.yaml"
|
||||
|
||||
with open(run_config_file, "w") as f:
|
||||
to_write = json.loads(run_config.model_dump_json())
|
||||
f.write(yaml.dump(to_write, sort_keys=False))
|
||||
|
||||
# Only print this message for non-container builds since it will be displayed before the
|
||||
# container is built
|
||||
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
||||
# makes sense to display this message
|
||||
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||
return run_config_file
|
||||
|
||||
|
||||
@lru_cache
|
||||
def available_templates_specs() -> dict[str, BuildConfig]:
|
||||
import yaml
|
||||
|
||||
template_specs = {}
|
||||
for p in TEMPLATES_PATH.rglob("*build.yaml"):
|
||||
template_name = p.parent.name
|
||||
with open(p) as f:
|
||||
build_config = BuildConfig(**yaml.safe_load(f))
|
||||
template_specs[template_name] = build_config
|
||||
return template_specs
|
||||
|
|
|
|||
|
|
@ -338,7 +338,7 @@ fi
|
|||
# Add other require item commands genearic to all containers
|
||||
add_to_container << EOF
|
||||
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
|
||||
EOF
|
||||
|
||||
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import importlib
|
||||
import importlib.metadata
|
||||
import inspect
|
||||
from typing import Any
|
||||
|
||||
|
|
|
|||
|
|
@ -42,3 +42,8 @@ def sync_test_context_from_provider_data():
|
|||
return TEST_CONTEXT.set(provider_data["__test_id"])
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_debug_mode() -> bool:
|
||||
"""Check if test recording debug mode is enabled via LLAMA_STACK_TEST_DEBUG env var."""
|
||||
return os.environ.get("LLAMA_STACK_TEST_DEBUG", "").lower() in ("1", "true", "yes")
|
||||
|
|
|
|||
|
|
@ -42,25 +42,25 @@ def resolve_config_or_distro(
|
|||
# Strategy 1: Try as file path first
|
||||
config_path = Path(config_or_distro)
|
||||
if config_path.exists() and config_path.is_file():
|
||||
logger.info(f"Using file path: {config_path}")
|
||||
logger.debug(f"Using file path: {config_path}")
|
||||
return config_path.resolve()
|
||||
|
||||
# Strategy 2: Try as distribution name (if no .yaml extension)
|
||||
if not config_or_distro.endswith(".yaml"):
|
||||
distro_config = _get_distro_config_path(config_or_distro, mode)
|
||||
if distro_config.exists():
|
||||
logger.info(f"Using distribution: {distro_config}")
|
||||
logger.debug(f"Using distribution: {distro_config}")
|
||||
return distro_config
|
||||
|
||||
# Strategy 3: Try as built distribution name
|
||||
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||
if distrib_config.exists():
|
||||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
logger.debug(f"Using built distribution: {distrib_config}")
|
||||
return distrib_config
|
||||
|
||||
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||
if distrib_config.exists():
|
||||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
logger.debug(f"Using built distribution: {distrib_config}")
|
||||
return distrib_config
|
||||
|
||||
# Strategy 4: Failed - provide helpful error
|
||||
|
|
|
|||
|
|
@ -70,10 +70,10 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type venv
|
||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/{{ name }}/run.yaml \
|
||||
--port 8321
|
||||
|
|
|
|||
|
|
@ -126,11 +126,11 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
llama stack run ./run.yaml \
|
||||
|
|
|
|||
|
|
@ -79,7 +79,6 @@ class TelemetryAdapter(Telemetry):
|
|||
metrics.set_meter_provider(metric_provider)
|
||||
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
|
||||
self._lock = _global_lock
|
||||
|
||||
async def initialize(self) -> None:
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ The following example shows how to create a chat completion for an NVIDIA NIM.
|
|||
|
||||
```python
|
||||
response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
|
|
@ -67,37 +67,40 @@ print(f"Response: {response.choices[0].message.content}")
|
|||
The following example shows how to do tool calling for an NVIDIA NIM.
|
||||
|
||||
```python
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
||||
|
||||
tool_definition = ToolDefinition(
|
||||
tool_name="get_weather",
|
||||
description="Get current weather information for a location",
|
||||
parameters={
|
||||
"location": ToolParamDefinition(
|
||||
param_type="string",
|
||||
description="The city and state, e.g. San Francisco, CA",
|
||||
required=True,
|
||||
),
|
||||
"unit": ToolParamDefinition(
|
||||
param_type="string",
|
||||
description="Temperature unit (celsius or fahrenheit)",
|
||||
required=False,
|
||||
default="celsius",
|
||||
),
|
||||
tool_definition = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get current weather information for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"description": "Temperature unit (celsius or fahrenheit)",
|
||||
"default": "celsius",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tool_response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
|
||||
tools=[tool_definition],
|
||||
)
|
||||
|
||||
print(f"Tool Response: {tool_response.choices[0].message.content}")
|
||||
print(f"Response content: {tool_response.choices[0].message.content}")
|
||||
if tool_response.choices[0].message.tool_calls:
|
||||
for tool_call in tool_response.choices[0].message.tool_calls:
|
||||
print(f"Tool Called: {tool_call.tool_name}")
|
||||
print(f"Arguments: {tool_call.arguments}")
|
||||
print(f"Tool Called: {tool_call.function.name}")
|
||||
print(f"Arguments: {tool_call.function.arguments}")
|
||||
```
|
||||
|
||||
### Structured Output Example
|
||||
|
|
@ -105,33 +108,26 @@ if tool_response.choices[0].message.tool_calls:
|
|||
The following example shows how to do structured output for an NVIDIA NIM.
|
||||
|
||||
```python
|
||||
from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
|
||||
|
||||
person_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer"},
|
||||
"age": {"type": "number"},
|
||||
"occupation": {"type": "string"},
|
||||
},
|
||||
"required": ["name", "age", "occupation"],
|
||||
}
|
||||
|
||||
response_format = JsonSchemaResponseFormat(
|
||||
type=ResponseFormatType.json_schema, json_schema=person_schema
|
||||
)
|
||||
|
||||
structured_response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
|
||||
}
|
||||
],
|
||||
response_format=response_format,
|
||||
extra_body={"nvext": {"guided_json": person_schema}},
|
||||
)
|
||||
|
||||
print(f"Structured Response: {structured_response.choices[0].message.content}")
|
||||
```
|
||||
|
||||
|
|
@ -141,7 +137,7 @@ The following example shows how to create embeddings for an NVIDIA NIM.
|
|||
|
||||
```python
|
||||
response = client.embeddings.create(
|
||||
model="nvidia/llama-3.2-nv-embedqa-1b-v2",
|
||||
model="nvidia/nvidia/llama-3.2-nv-embedqa-1b-v2",
|
||||
input=["What is the capital of France?"],
|
||||
extra_body={"input_type": "query"},
|
||||
)
|
||||
|
|
@ -163,15 +159,15 @@ image_path = {path_to_the_image}
|
|||
demo_image_b64 = load_image_as_base64(image_path)
|
||||
|
||||
vlm_response = client.chat.completions.create(
|
||||
model="nvidia/vila",
|
||||
model="nvidia/meta/llama-3.2-11b-vision-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": demo_image_b64,
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{demo_image_b64}",
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -19,15 +19,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|||
|
||||
"""
|
||||
NVIDIA Inference Adapter for Llama Stack.
|
||||
|
||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||
ModelRegistryHelper to ensure that OpenAIMixin.check_model_availability()
|
||||
is used instead of ModelRegistryHelper.check_model_availability(). It also
|
||||
must come before Inference to ensure that OpenAIMixin methods are available
|
||||
in the Inference interface.
|
||||
|
||||
- OpenAIMixin.check_model_availability() queries the NVIDIA API to check if a model exists
|
||||
- ModelRegistryHelper.check_model_availability() just returns False and shows a warning
|
||||
"""
|
||||
|
||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
|
|||
"__class__": class_name,
|
||||
"__method__": method_name,
|
||||
"__type__": span_type,
|
||||
"__args__": str(combined_args),
|
||||
"__args__": json.dumps(combined_args),
|
||||
}
|
||||
|
||||
return class_name, method_name, span_attributes
|
||||
|
|
@ -82,8 +82,8 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
|
|||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
||||
|
||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
||||
try:
|
||||
count = 0
|
||||
try:
|
||||
async for item in method(self, *args, **kwargs):
|
||||
yield item
|
||||
count += 1
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ _id_counters: dict[str, dict[str, int]] = {}
|
|||
# Test context uses ContextVar since it changes per-test and needs async isolation
|
||||
from openai.types.completion_choice import CompletionChoice
|
||||
|
||||
from llama_stack.core.testing_context import get_test_context
|
||||
from llama_stack.core.testing_context import get_test_context, is_debug_mode
|
||||
|
||||
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
||||
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
||||
|
|
@ -146,6 +146,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
|||
|
||||
body_for_hash = _normalize_body_for_hash(body)
|
||||
|
||||
test_id = get_test_context()
|
||||
normalized: dict[str, Any] = {
|
||||
"method": method.upper(),
|
||||
"endpoint": parsed.path,
|
||||
|
|
@ -154,10 +155,20 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
|||
|
||||
# Include test_id for isolation, except for shared infrastructure endpoints
|
||||
if parsed.path not in ("/api/tags", "/v1/models"):
|
||||
normalized["test_id"] = get_test_context()
|
||||
normalized["test_id"] = test_id
|
||||
|
||||
normalized_json = json.dumps(normalized, sort_keys=True)
|
||||
return hashlib.sha256(normalized_json.encode()).hexdigest()
|
||||
request_hash = hashlib.sha256(normalized_json.encode()).hexdigest()
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Hash computation:")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" Method: {method.upper()}")
|
||||
logger.info(f" Endpoint: {parsed.path}")
|
||||
logger.info(f" Model: {body.get('model', 'N/A')}")
|
||||
logger.info(f" Computed hash: {request_hash}")
|
||||
|
||||
return request_hash
|
||||
|
||||
|
||||
def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str, Any]) -> str:
|
||||
|
|
@ -212,6 +223,11 @@ def patch_httpx_for_test_id():
|
|||
provider_data["__test_id"] = test_id
|
||||
request.headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Injected test ID into request header:")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" URL: {request.url}")
|
||||
|
||||
return None
|
||||
|
||||
LlamaStackClient._prepare_request = patched_prepare_request
|
||||
|
|
@ -355,12 +371,35 @@ class ResponseStorage:
|
|||
test_file = test_id.split("::")[0] # Remove test function part
|
||||
test_dir = Path(test_file).parent # Get parent directory
|
||||
|
||||
# Put recordings in a "recordings" subdirectory of the test's parent dir
|
||||
# e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
|
||||
return test_dir / "recordings"
|
||||
if self.base_dir.is_absolute():
|
||||
repo_root = self.base_dir.parent.parent.parent
|
||||
result = repo_root / test_dir / "recordings"
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Path resolution (absolute base_dir):")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" Base dir: {self.base_dir}")
|
||||
logger.info(f" Repo root: {repo_root}")
|
||||
logger.info(f" Test file: {test_file}")
|
||||
logger.info(f" Test dir: {test_dir}")
|
||||
logger.info(f" Recordings dir: {result}")
|
||||
return result
|
||||
else:
|
||||
result = test_dir / "recordings"
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Path resolution (relative base_dir):")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" Base dir: {self.base_dir}")
|
||||
logger.info(f" Test dir: {test_dir}")
|
||||
logger.info(f" Recordings dir: {result}")
|
||||
return result
|
||||
else:
|
||||
# Fallback for non-test contexts
|
||||
return self.base_dir / "recordings"
|
||||
result = self.base_dir / "recordings"
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Path resolution (no test context):")
|
||||
logger.info(f" Base dir: {self.base_dir}")
|
||||
logger.info(f" Recordings dir: {result}")
|
||||
return result
|
||||
|
||||
def _ensure_directory(self):
|
||||
"""Ensure test-specific directories exist."""
|
||||
|
|
@ -395,6 +434,13 @@ class ResponseStorage:
|
|||
|
||||
response_path = responses_dir / response_file
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Storing recording:")
|
||||
logger.info(f" Request hash: {request_hash}")
|
||||
logger.info(f" File: {response_path}")
|
||||
logger.info(f" Test ID: {get_test_context()}")
|
||||
logger.info(f" Endpoint: {endpoint}")
|
||||
|
||||
# Save response to JSON file with metadata
|
||||
with open(response_path, "w") as f:
|
||||
json.dump(
|
||||
|
|
@ -423,16 +469,33 @@ class ResponseStorage:
|
|||
test_dir = self._get_test_dir()
|
||||
response_path = test_dir / response_file
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Looking up recording:")
|
||||
logger.info(f" Request hash: {request_hash}")
|
||||
logger.info(f" Primary path: {response_path}")
|
||||
logger.info(f" Primary exists: {response_path.exists()}")
|
||||
|
||||
if response_path.exists():
|
||||
if is_debug_mode():
|
||||
logger.info(" Found in primary location")
|
||||
return _recording_from_file(response_path)
|
||||
|
||||
# Fallback to base recordings directory (for session-level recordings)
|
||||
fallback_dir = self.base_dir / "recordings"
|
||||
fallback_path = fallback_dir / response_file
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info(f" Fallback path: {fallback_path}")
|
||||
logger.info(f" Fallback exists: {fallback_path.exists()}")
|
||||
|
||||
if fallback_path.exists():
|
||||
if is_debug_mode():
|
||||
logger.info(" Found in fallback location")
|
||||
return _recording_from_file(fallback_path)
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info(" Recording not found in either location")
|
||||
|
||||
return None
|
||||
|
||||
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
|
||||
|
|
@ -588,6 +651,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
mode = _current_mode
|
||||
storage = _current_storage
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Entering inference method:")
|
||||
logger.info(f" Mode: {mode}")
|
||||
logger.info(f" Client type: {client_type}")
|
||||
logger.info(f" Endpoint: {endpoint}")
|
||||
logger.info(f" Test context: {get_test_context()}")
|
||||
|
||||
if mode == APIRecordingMode.LIVE or storage is None:
|
||||
if endpoint == "/v1/models":
|
||||
return original_method(self, *args, **kwargs)
|
||||
|
|
@ -643,6 +713,18 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
return response_body
|
||||
elif mode == APIRecordingMode.REPLAY:
|
||||
# REPLAY mode requires recording to exist
|
||||
if is_debug_mode():
|
||||
logger.error("[RECORDING DEBUG] Recording not found!")
|
||||
logger.error(f" Mode: {mode}")
|
||||
logger.error(f" Request hash: {request_hash}")
|
||||
logger.error(f" Method: {method}")
|
||||
logger.error(f" URL: {url}")
|
||||
logger.error(f" Endpoint: {endpoint}")
|
||||
logger.error(f" Model: {body.get('model', 'unknown')}")
|
||||
logger.error(f" Test context: {get_test_context()}")
|
||||
logger.error(
|
||||
f" Stack config type: {os.environ.get('LLAMA_STACK_TEST_STACK_CONFIG_TYPE', 'library_client')}"
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"Recording not found for request hash: {request_hash}\n"
|
||||
f"Model: {body.get('model', 'unknown')} | Request: {method} {url}\n"
|
||||
|
|
|
|||
2647
llama_stack/ui/package-lock.json
generated
2647
llama_stack/ui/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -43,16 +43,16 @@
|
|||
"@testing-library/dom": "^10.4.1",
|
||||
"@testing-library/jest-dom": "^6.8.0",
|
||||
"@testing-library/react": "^16.3.0",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^24",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"eslint": "^9",
|
||||
"eslint-config-next": "15.5.2",
|
||||
"eslint-config-next": "15.5.6",
|
||||
"eslint-config-prettier": "^10.1.8",
|
||||
"eslint-plugin-prettier": "^5.5.4",
|
||||
"jest": "^29.7.0",
|
||||
"jest-environment-jsdom": "^30.1.2",
|
||||
"jest": "^30.2.0",
|
||||
"jest-environment-jsdom": "^30.2.0",
|
||||
"prettier": "3.6.2",
|
||||
"tailwindcss": "^4",
|
||||
"ts-node": "^10.9.2",
|
||||
|
|
|
|||
370
scripts/diagnose_recordings.py
Executable file
370
scripts/diagnose_recordings.py
Executable file
|
|
@ -0,0 +1,370 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Diagnostic tool for debugging test recording issues.
|
||||
|
||||
Usage:
|
||||
# Find where a hash would be looked up
|
||||
./scripts/diagnose_recordings.py find-hash 7526c930eab04ce337496a26cd15f2591d7943035f2527182861643da9b837a7
|
||||
|
||||
# Show what's in a recording file
|
||||
./scripts/diagnose_recordings.py show tests/integration/agents/recordings/7526c930....json
|
||||
|
||||
# List all recordings for a test
|
||||
./scripts/diagnose_recordings.py list-test "tests/integration/agents/test_agents.py::test_custom_tool"
|
||||
|
||||
# Explain lookup paths for a test
|
||||
./scripts/diagnose_recordings.py explain-paths --test-id "tests/integration/agents/test_agents.py::test_foo"
|
||||
|
||||
# Compare request hash computation
|
||||
./scripts/diagnose_recordings.py compute-hash --endpoint /v1/chat/completions --method POST --body '{"model":"llama3.2:3b"}' --test-id "..."
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path to import from llama_stack
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
try:
|
||||
from llama_stack.testing.api_recorder import normalize_inference_request
|
||||
except ImportError:
|
||||
normalize_inference_request = None
|
||||
|
||||
|
||||
def find_hash(hash_value: str, base_dir: Path | None = None, test_id: str | None = None):
|
||||
"""Find where a hash would be looked up and what exists"""
|
||||
if base_dir is None:
|
||||
base_dir = REPO_ROOT / "tests/integration/common"
|
||||
|
||||
print(f"Searching for hash: {hash_value}\n")
|
||||
print(f"Base dir: {base_dir} (absolute={base_dir.is_absolute()})")
|
||||
|
||||
# Compute test directory
|
||||
if test_id:
|
||||
test_file = test_id.split("::")[0]
|
||||
test_dir = Path(test_file).parent
|
||||
|
||||
if base_dir.is_absolute():
|
||||
repo_root = base_dir.parent.parent.parent
|
||||
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||
else:
|
||||
test_recordings_dir = test_dir / "recordings"
|
||||
print(f"Test ID: {test_id}")
|
||||
print(f"Test dir: {test_recordings_dir}\n")
|
||||
else:
|
||||
test_recordings_dir = base_dir / "recordings"
|
||||
print("No test ID provided, using base dir\n")
|
||||
|
||||
# Check primary location
|
||||
response_file = f"{hash_value}.json"
|
||||
response_path = test_recordings_dir / response_file
|
||||
|
||||
print("Checking primary location:")
|
||||
print(f" {response_path}")
|
||||
if response_path.exists():
|
||||
print(" EXISTS")
|
||||
print("\nFound! Contents:")
|
||||
show_recording(response_path)
|
||||
return True
|
||||
else:
|
||||
print(" Does not exist")
|
||||
|
||||
# Check fallback location
|
||||
fallback_dir = base_dir / "recordings"
|
||||
fallback_path = fallback_dir / response_file
|
||||
|
||||
print("\nChecking fallback location:")
|
||||
print(f" {fallback_path}")
|
||||
if fallback_path.exists():
|
||||
print(" EXISTS")
|
||||
print("\nFound in fallback! Contents:")
|
||||
show_recording(fallback_path)
|
||||
return True
|
||||
else:
|
||||
print(" Does not exist")
|
||||
|
||||
# Show what files DO exist
|
||||
print(f"\nFiles in test directory ({test_recordings_dir}):")
|
||||
if test_recordings_dir.exists():
|
||||
json_files = list(test_recordings_dir.glob("*.json"))
|
||||
if json_files:
|
||||
for f in json_files[:20]:
|
||||
print(f" - {f.name}")
|
||||
if len(json_files) > 20:
|
||||
print(f" ... and {len(json_files) - 20} more")
|
||||
else:
|
||||
print(" (empty)")
|
||||
else:
|
||||
print(" Directory does not exist")
|
||||
|
||||
print(f"\nFiles in fallback directory ({fallback_dir}):")
|
||||
if fallback_dir.exists():
|
||||
json_files = list(fallback_dir.glob("*.json"))
|
||||
if json_files:
|
||||
for f in json_files[:20]:
|
||||
print(f" - {f.name}")
|
||||
if len(json_files) > 20:
|
||||
print(f" ... and {len(json_files) - 20} more")
|
||||
else:
|
||||
print(" (empty)")
|
||||
else:
|
||||
print(" Directory does not exist")
|
||||
|
||||
# Try partial hash match
|
||||
print("\nLooking for partial matches (first 16 chars)...")
|
||||
partial = hash_value[:16]
|
||||
matches = []
|
||||
|
||||
for dir_to_search in [test_recordings_dir, fallback_dir]:
|
||||
if dir_to_search.exists():
|
||||
for f in dir_to_search.glob("*.json"):
|
||||
if f.stem.startswith(partial):
|
||||
matches.append(f)
|
||||
|
||||
if matches:
|
||||
print(f"Found {len(matches)} partial match(es):")
|
||||
for m in matches:
|
||||
print(f" {m}")
|
||||
else:
|
||||
print("No partial matches found")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def show_recording(file_path: Path):
|
||||
"""Show contents of a recording file"""
|
||||
if not file_path.exists():
|
||||
print(f"File does not exist: {file_path}")
|
||||
return
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
print(f"\nRecording: {file_path.name}\n")
|
||||
print(f"Test ID: {data.get('test_id', 'N/A')}")
|
||||
print("\nRequest:")
|
||||
req = data.get("request", {})
|
||||
print(f" Method: {req.get('method', 'N/A')}")
|
||||
print(f" URL: {req.get('url', 'N/A')}")
|
||||
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
|
||||
print(f" Model: {req.get('model', 'N/A')}")
|
||||
|
||||
body = req.get("body", {})
|
||||
if body:
|
||||
print("\nRequest Body:")
|
||||
print(f" Model: {body.get('model', 'N/A')}")
|
||||
print(f" Stream: {body.get('stream', 'N/A')}")
|
||||
if "messages" in body:
|
||||
print(f" Messages: {len(body['messages'])} message(s)")
|
||||
for i, msg in enumerate(body["messages"][:3]):
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str):
|
||||
preview = content[:80] + "..." if len(content) > 80 else content
|
||||
else:
|
||||
preview = f"[{type(content).__name__}]"
|
||||
print(f" [{i}] {role}: {preview}")
|
||||
if "tools" in body:
|
||||
print(f" Tools: {len(body['tools'])} tool(s)")
|
||||
|
||||
response = data.get("response", {})
|
||||
if response:
|
||||
print("\nResponse:")
|
||||
print(f" Is streaming: {response.get('is_streaming', False)}")
|
||||
response_body = response.get("body", {})
|
||||
if isinstance(response_body, dict):
|
||||
if "__type__" in response_body:
|
||||
print(f" Type: {response_body['__type__']}")
|
||||
if "__data__" in response_body:
|
||||
response_data = response_body["__data__"]
|
||||
if "choices" in response_data:
|
||||
print(f" Choices: {len(response_data['choices'])}")
|
||||
if "usage" in response_data:
|
||||
usage = response_data["usage"]
|
||||
print(f" Usage: in={usage.get('input_tokens')}, out={usage.get('output_tokens')}")
|
||||
|
||||
|
||||
def list_test_recordings(test_id: str, base_dir: Path | None = None):
|
||||
"""List all recordings for a specific test"""
|
||||
if base_dir is None:
|
||||
base_dir = REPO_ROOT / "tests/integration/common"
|
||||
|
||||
test_file = test_id.split("::")[0]
|
||||
test_dir = Path(test_file).parent
|
||||
|
||||
if base_dir.is_absolute():
|
||||
repo_root = base_dir.parent.parent.parent
|
||||
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||
else:
|
||||
test_recordings_dir = test_dir / "recordings"
|
||||
|
||||
print(f"Recordings for test: {test_id}\n")
|
||||
print(f"Directory: {test_recordings_dir}\n")
|
||||
|
||||
if not test_recordings_dir.exists():
|
||||
print("Directory does not exist")
|
||||
return
|
||||
|
||||
# Find all recordings for this specific test
|
||||
recordings = []
|
||||
for f in test_recordings_dir.glob("*.json"):
|
||||
try:
|
||||
with open(f) as fp:
|
||||
data = json.load(fp)
|
||||
if data.get("test_id") == test_id:
|
||||
recordings.append((f, data))
|
||||
except Exception as e:
|
||||
print(f"Could not read {f.name}: {e}")
|
||||
|
||||
if not recordings:
|
||||
print("No recordings found for this exact test ID")
|
||||
print("\nAll files in directory:")
|
||||
for f in test_recordings_dir.glob("*.json"):
|
||||
print(f" - {f.name}")
|
||||
return
|
||||
|
||||
print(f"Found {len(recordings)} recording(s):\n")
|
||||
for f, data in recordings:
|
||||
req = data.get("request", {})
|
||||
print(f" {f.name}")
|
||||
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
|
||||
print(f" Model: {req.get('model', 'N/A')}")
|
||||
print("")
|
||||
|
||||
|
||||
def explain_paths(test_id: str | None = None, base_dir: Path | None = None):
|
||||
"""Explain where recordings would be searched"""
|
||||
if base_dir is None:
|
||||
base_dir = REPO_ROOT / "tests/integration/common"
|
||||
|
||||
print("Recording Lookup Path Explanation\n")
|
||||
print(f"Base directory: {base_dir}")
|
||||
print(f" Absolute: {base_dir.is_absolute()}")
|
||||
print("")
|
||||
|
||||
if test_id:
|
||||
print(f"Test ID: {test_id}")
|
||||
test_file = test_id.split("::")[0]
|
||||
print(f" Test file: {test_file}")
|
||||
|
||||
test_dir = Path(test_file).parent
|
||||
print(f" Test dir (relative): {test_dir}")
|
||||
|
||||
if base_dir.is_absolute():
|
||||
repo_root = base_dir.parent.parent.parent
|
||||
print(f" Repo root: {repo_root}")
|
||||
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||
print(f" Test recordings dir (absolute): {test_recordings_dir}")
|
||||
else:
|
||||
test_recordings_dir = test_dir / "recordings"
|
||||
print(f" Test recordings dir (relative): {test_recordings_dir}")
|
||||
|
||||
print("\nLookup order for recordings:")
|
||||
print(f" 1. Test-specific: {test_recordings_dir}/<hash>.json")
|
||||
print(f" 2. Fallback: {base_dir}/recordings/<hash>.json")
|
||||
|
||||
else:
|
||||
print("No test ID provided")
|
||||
print("\nLookup location:")
|
||||
print(f" {base_dir}/recordings/<hash>.json")
|
||||
|
||||
|
||||
def compute_hash(endpoint: str, method: str, body_json: str, test_id: str | None = None):
|
||||
"""Compute hash for a request"""
|
||||
if normalize_inference_request is None:
|
||||
print("Could not import normalize_inference_request from llama_stack.testing.api_recorder")
|
||||
print("Make sure you're running from the repo root with proper PYTHONPATH")
|
||||
return
|
||||
|
||||
try:
|
||||
body = json.loads(body_json)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Invalid JSON in body: {e}")
|
||||
return
|
||||
|
||||
# Create a fake URL with the endpoint
|
||||
url = f"http://example.com{endpoint}"
|
||||
|
||||
# Set test context if provided
|
||||
if test_id:
|
||||
from llama_stack.core.testing_context import set_test_context
|
||||
|
||||
set_test_context(test_id)
|
||||
|
||||
hash_result = normalize_inference_request(method, url, {}, body)
|
||||
|
||||
print("Hash Computation\n")
|
||||
print(f"Method: {method}")
|
||||
print(f"Endpoint: {endpoint}")
|
||||
print(f"Test ID: {test_id or 'None (excluded from hash for model-list endpoints)'}")
|
||||
print("\nBody:")
|
||||
print(json.dumps(body, indent=2))
|
||||
print(f"\nComputed Hash: {hash_result}")
|
||||
print(f"\nLooking for file: {hash_result}.json")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Diagnostic tool for test recording issues",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||
|
||||
# find-hash command
|
||||
find_parser = subparsers.add_parser("find-hash", help="Find where a hash would be looked up")
|
||||
find_parser.add_argument("hash", help="Hash value to search for (full or partial)")
|
||||
find_parser.add_argument("--test-id", help="Test ID to determine search paths")
|
||||
find_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||
|
||||
# show command
|
||||
show_parser = subparsers.add_parser("show", help="Show contents of a recording file")
|
||||
show_parser.add_argument("file", type=Path, help="Path to recording JSON file")
|
||||
|
||||
# list-test command
|
||||
list_parser = subparsers.add_parser("list-test", help="List all recordings for a test")
|
||||
list_parser.add_argument("test_id", help="Full test ID (e.g., tests/integration/agents/test_agents.py::test_foo)")
|
||||
list_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||
|
||||
# explain-paths command
|
||||
explain_parser = subparsers.add_parser("explain-paths", help="Explain where recordings are searched")
|
||||
explain_parser.add_argument("--test-id", help="Test ID to show paths for")
|
||||
explain_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||
|
||||
# compute-hash command
|
||||
hash_parser = subparsers.add_parser("compute-hash", help="Compute hash for a request")
|
||||
hash_parser.add_argument("--endpoint", required=True, help="Endpoint path (e.g., /v1/chat/completions)")
|
||||
hash_parser.add_argument("--method", default="POST", help="HTTP method (default: POST)")
|
||||
hash_parser.add_argument("--body", required=True, help="Request body as JSON string")
|
||||
hash_parser.add_argument("--test-id", help="Test ID (affects hash for non-model-list endpoints)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
if args.command == "find-hash":
|
||||
find_hash(args.hash, args.base_dir, args.test_id)
|
||||
elif args.command == "show":
|
||||
show_recording(args.file)
|
||||
elif args.command == "list-test":
|
||||
list_test_recordings(args.test_id, args.base_dir)
|
||||
elif args.command == "explain-paths":
|
||||
explain_paths(args.test_id, args.base_dir)
|
||||
elif args.command == "compute-hash":
|
||||
compute_hash(args.endpoint, args.method, args.body, args.test_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
358
scripts/docker.sh
Executable file
358
scripts/docker.sh
Executable file
|
|
@ -0,0 +1,358 @@
|
|||
#!/bin/bash
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Docker container management script for Llama Stack
|
||||
# Allows starting/stopping/restarting a Llama Stack docker container for testing
|
||||
|
||||
# Default values
|
||||
DISTRO=""
|
||||
PORT=8321
|
||||
INFERENCE_MODE="replay"
|
||||
COMMAND=""
|
||||
USE_COPY_NOT_MOUNT=false
|
||||
NO_REBUILD=false
|
||||
|
||||
# Function to display usage
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 COMMAND [OPTIONS]
|
||||
|
||||
Commands:
|
||||
start Build and start the docker container
|
||||
stop Stop and remove the docker container
|
||||
restart Restart the docker container
|
||||
status Check if the container is running
|
||||
logs Show container logs (add -f to follow)
|
||||
|
||||
Options:
|
||||
--distro STRING Distribution name (e.g., 'ci-tests', 'starter') (required for start/restart)
|
||||
--port NUMBER Port to run on (default: 8321)
|
||||
--inference-mode STRING Inference mode: replay, record-if-missing or record (default: replay)
|
||||
--copy-source Copy source into image instead of mounting (default: auto-detect CI, otherwise mount)
|
||||
--no-rebuild Skip building the image, just start the container (default: false)
|
||||
--help Show this help message
|
||||
|
||||
Examples:
|
||||
# Start a docker container (local dev mode - mounts source, builds image)
|
||||
$0 start --distro ci-tests
|
||||
|
||||
# Start without rebuilding (uses existing image)
|
||||
$0 start --distro ci-tests --no-rebuild
|
||||
|
||||
# Start with source copied into image (like CI)
|
||||
$0 start --distro ci-tests --copy-source
|
||||
|
||||
# Start with custom port
|
||||
$0 start --distro starter --port 8080
|
||||
|
||||
# Check status
|
||||
$0 status --distro ci-tests
|
||||
|
||||
# View logs
|
||||
$0 logs --distro ci-tests
|
||||
|
||||
# Stop container
|
||||
$0 stop --distro ci-tests
|
||||
|
||||
# Restart container
|
||||
$0 restart --distro ci-tests
|
||||
|
||||
Note: In CI environments (detected via CI or GITHUB_ACTIONS env vars), source is
|
||||
automatically copied into the image. Locally, source is mounted for live development
|
||||
unless --copy-source is specified.
|
||||
EOF
|
||||
}
|
||||
|
||||
# Parse command (first positional arg)
|
||||
if [[ $# -eq 0 ]]; then
|
||||
echo "Error: Command required"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
COMMAND="$1"
|
||||
shift
|
||||
|
||||
# Validate command
|
||||
case "$COMMAND" in
|
||||
start | stop | restart | status | logs) ;;
|
||||
--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown command: $COMMAND"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Parse options
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--distro)
|
||||
DISTRO="$2"
|
||||
shift 2
|
||||
;;
|
||||
--port)
|
||||
PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--inference-mode)
|
||||
INFERENCE_MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--copy-source)
|
||||
USE_COPY_NOT_MOUNT=true
|
||||
shift
|
||||
;;
|
||||
--no-rebuild)
|
||||
NO_REBUILD=true
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate required parameters for commands that need them
|
||||
if [[ "$COMMAND" != "stop" && "$COMMAND" != "status" && "$COMMAND" != "logs" ]]; then
|
||||
if [[ -z "$DISTRO" ]]; then
|
||||
echo "Error: --distro is required for '$COMMAND' command"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# If distro not provided for stop/status/logs, try to infer from running containers
|
||||
if [[ -z "$DISTRO" && ("$COMMAND" == "stop" || "$COMMAND" == "status" || "$COMMAND" == "logs") ]]; then
|
||||
# Look for any llama-stack-test-* container
|
||||
RUNNING_CONTAINERS=$(docker ps -a --filter "name=llama-stack-test-" --format "{{.Names}}" | head -1)
|
||||
if [[ -n "$RUNNING_CONTAINERS" ]]; then
|
||||
DISTRO=$(echo "$RUNNING_CONTAINERS" | sed 's/llama-stack-test-//')
|
||||
echo "Found running container for distro: $DISTRO"
|
||||
else
|
||||
echo "Error: --distro is required (no running containers found)"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove docker: prefix if present
|
||||
DISTRO=$(echo "$DISTRO" | sed 's/^docker://')
|
||||
|
||||
CONTAINER_NAME="llama-stack-test-$DISTRO"
|
||||
|
||||
# Function to check if container is running
|
||||
is_container_running() {
|
||||
docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
|
||||
}
|
||||
|
||||
# Function to check if container exists (running or stopped)
|
||||
container_exists() {
|
||||
docker ps -a --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
|
||||
}
|
||||
|
||||
# Function to stop and remove container
|
||||
stop_container() {
|
||||
if container_exists; then
|
||||
echo "Stopping container: $CONTAINER_NAME"
|
||||
docker stop "$CONTAINER_NAME" 2>/dev/null || true
|
||||
echo "Removing container: $CONTAINER_NAME"
|
||||
docker rm "$CONTAINER_NAME" 2>/dev/null || true
|
||||
echo "✅ Container stopped and removed"
|
||||
else
|
||||
echo "⚠️ Container $CONTAINER_NAME does not exist"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to build docker image
|
||||
build_image() {
|
||||
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||
# Get the repo root (parent of scripts directory)
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||
|
||||
# Determine whether to copy or mount source
|
||||
# Copy in CI or if explicitly requested, otherwise mount for live development
|
||||
BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
|
||||
if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
|
||||
echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
|
||||
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
|
||||
else
|
||||
echo "Will mount source for live development"
|
||||
fi
|
||||
|
||||
if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
|
||||
echo "❌ Failed to build Docker image"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Docker image built successfully"
|
||||
}
|
||||
|
||||
# Function to start container
|
||||
start_container() {
|
||||
# Check if already running
|
||||
if is_container_running; then
|
||||
echo "⚠️ Container $CONTAINER_NAME is already running"
|
||||
echo "URL: http://localhost:$PORT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Stop and remove if exists but not running
|
||||
if container_exists; then
|
||||
echo "Removing existing stopped container..."
|
||||
docker rm "$CONTAINER_NAME" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Build the image unless --no-rebuild was specified
|
||||
if [[ "$NO_REBUILD" == "true" ]]; then
|
||||
echo "Skipping build (--no-rebuild specified)"
|
||||
# Check if image exists (with or without localhost/ prefix)
|
||||
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
|
||||
echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
|
||||
echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Found existing image for distribution-$DISTRO:dev"
|
||||
else
|
||||
build_image
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Starting Docker Container ==="
|
||||
|
||||
# Get the repo root for volume mount
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||
|
||||
# Determine the actual image name (may have localhost/ prefix)
|
||||
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
|
||||
if [[ -z "$IMAGE_NAME" ]]; then
|
||||
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
|
||||
exit 1
|
||||
fi
|
||||
echo "Using image: $IMAGE_NAME"
|
||||
|
||||
# Build environment variables for docker run
|
||||
DOCKER_ENV_VARS=""
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||
|
||||
# Set default OLLAMA_URL if not provided
|
||||
# On macOS/Windows, use host.docker.internal to reach host from container
|
||||
# On Linux with --network host, use localhost
|
||||
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
|
||||
else
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
|
||||
fi
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||
|
||||
# Pass through API keys if they exist
|
||||
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
|
||||
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
|
||||
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
|
||||
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
|
||||
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
|
||||
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
|
||||
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
|
||||
[ -n "${SQLITE_STORE_DIR:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SQLITE_STORE_DIR=$SQLITE_STORE_DIR"
|
||||
|
||||
# Use --network host on Linux only (macOS doesn't support it properly)
|
||||
NETWORK_MODE=""
|
||||
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||
NETWORK_MODE="--network host"
|
||||
fi
|
||||
|
||||
docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
|
||||
-p $PORT:$PORT \
|
||||
$DOCKER_ENV_VARS \
|
||||
-v "$REPO_ROOT":/app/llama-stack-source \
|
||||
"$IMAGE_NAME" \
|
||||
--port $PORT
|
||||
|
||||
echo "Waiting for container to start..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:$PORT/v1/health 2>/dev/null | grep -q "OK"; then
|
||||
echo "✅ Container started successfully"
|
||||
echo ""
|
||||
echo "=== Container Information ==="
|
||||
echo "Container name: $CONTAINER_NAME"
|
||||
echo "URL: http://localhost:$PORT"
|
||||
echo "Health check: http://localhost:$PORT/v1/health"
|
||||
echo ""
|
||||
echo "To view logs: $0 logs --distro $DISTRO"
|
||||
echo "To stop: $0 stop --distro $DISTRO"
|
||||
return 0
|
||||
fi
|
||||
if [[ $i -eq 30 ]]; then
|
||||
echo "❌ Container failed to start within timeout"
|
||||
echo "Showing container logs:"
|
||||
docker logs "$CONTAINER_NAME"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
# Execute command
|
||||
case "$COMMAND" in
|
||||
start)
|
||||
start_container
|
||||
;;
|
||||
stop)
|
||||
stop_container
|
||||
;;
|
||||
restart)
|
||||
echo "Restarting container: $CONTAINER_NAME"
|
||||
stop_container
|
||||
echo ""
|
||||
start_container
|
||||
;;
|
||||
status)
|
||||
if is_container_running; then
|
||||
echo "✅ Container $CONTAINER_NAME is running"
|
||||
echo "URL: http://localhost:$PORT"
|
||||
# Try to get the actual port from the container
|
||||
ACTUAL_PORT=$(docker port "$CONTAINER_NAME" 2>/dev/null | grep "8321/tcp" | cut -d':' -f2 | head -1)
|
||||
if [[ -n "$ACTUAL_PORT" ]]; then
|
||||
echo "Port: $ACTUAL_PORT"
|
||||
fi
|
||||
elif container_exists; then
|
||||
echo "⚠️ Container $CONTAINER_NAME exists but is not running"
|
||||
echo "Start it with: $0 start --distro $DISTRO"
|
||||
else
|
||||
echo "❌ Container $CONTAINER_NAME does not exist"
|
||||
echo "Start it with: $0 start --distro $DISTRO"
|
||||
fi
|
||||
;;
|
||||
logs)
|
||||
if container_exists; then
|
||||
echo "=== Logs for $CONTAINER_NAME ==="
|
||||
# Check if -f flag was passed after 'logs' command
|
||||
if [[ "${1:-}" == "-f" || "${1:-}" == "--follow" ]]; then
|
||||
docker logs --tail 100 --follow "$CONTAINER_NAME"
|
||||
else
|
||||
docker logs --tail 100 "$CONTAINER_NAME"
|
||||
fi
|
||||
else
|
||||
echo "❌ Container $CONTAINER_NAME does not exist"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
|
@ -5,10 +5,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
[ -z "$BASH_VERSION" ] && {
|
||||
echo "This script must be run with bash" >&2
|
||||
exit 1
|
||||
}
|
||||
[ -z "${BASH_VERSION:-}" ] && exec /usr/bin/env bash "$0" "$@"
|
||||
if set -o | grep -Eq 'posix[[:space:]]+on'; then
|
||||
exec /usr/bin/env bash "$0" "$@"
|
||||
fi
|
||||
|
||||
set -Eeuo pipefail
|
||||
|
||||
|
|
@ -18,12 +18,110 @@ MODEL_ALIAS="llama3.2:3b"
|
|||
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
||||
WAIT_TIMEOUT=30
|
||||
TEMP_LOG=""
|
||||
WITH_TELEMETRY=true
|
||||
TELEMETRY_SERVICE_NAME="llama-stack"
|
||||
TELEMETRY_SINKS="otel_trace,otel_metric"
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT="http://otel-collector:4318"
|
||||
TEMP_TELEMETRY_DIR=""
|
||||
|
||||
materialize_telemetry_configs() {
|
||||
local dest="$1"
|
||||
mkdir -p "$dest"
|
||||
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||
local prom_cfg="${dest}/prometheus.yml"
|
||||
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||
|
||||
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
||||
if [ -e "$asset" ]; then
|
||||
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||
fi
|
||||
done
|
||||
|
||||
cat <<'EOF' > "$otel_cfg"
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 1s
|
||||
send_batch_size: 1024
|
||||
|
||||
exporters:
|
||||
# Export traces to Jaeger
|
||||
otlp/jaeger:
|
||||
endpoint: jaeger:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
# Export metrics to Prometheus
|
||||
prometheus:
|
||||
endpoint: 0.0.0.0:9464
|
||||
namespace: llama_stack
|
||||
|
||||
# Debug exporter for troubleshooting
|
||||
debug:
|
||||
verbosity: detailed
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp/jaeger, debug]
|
||||
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [prometheus, debug]
|
||||
EOF
|
||||
|
||||
cat <<'EOF' > "$prom_cfg"
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'otel-collector'
|
||||
static_configs:
|
||||
- targets: ['otel-collector:9464']
|
||||
EOF
|
||||
|
||||
cat <<'EOF' > "$graf_cfg"
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
|
||||
- name: Jaeger
|
||||
type: jaeger
|
||||
access: proxy
|
||||
url: http://jaeger:16686
|
||||
editable: true
|
||||
EOF
|
||||
}
|
||||
|
||||
# Cleanup function to remove temporary files
|
||||
cleanup() {
|
||||
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
||||
rm -f "$TEMP_LOG"
|
||||
fi
|
||||
if [ -n "$TEMP_TELEMETRY_DIR" ] && [ -d "$TEMP_TELEMETRY_DIR" ]; then
|
||||
rm -rf "$TEMP_TELEMETRY_DIR"
|
||||
fi
|
||||
}
|
||||
|
||||
# Set up trap to clean up on exit, error, or interrupt
|
||||
|
|
@ -32,7 +130,7 @@ trap cleanup EXIT ERR INT TERM
|
|||
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
|
||||
die(){
|
||||
printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2
|
||||
printf "\e[1;31m🐛 Report an issue @ https://github.com/meta-llama/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
||||
printf "\e[1;31m🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
|
@ -89,6 +187,12 @@ Options:
|
|||
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
||||
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
||||
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
||||
--with-telemetry Provision Jaeger, OTEL Collector, Prometheus, and Grafana (default: enabled)
|
||||
--no-telemetry, --without-telemetry
|
||||
Skip provisioning the telemetry stack
|
||||
--telemetry-service NAME Service name reported to telemetry (default: ${TELEMETRY_SERVICE_NAME})
|
||||
--telemetry-sinks SINKS Comma-separated telemetry sinks (default: ${TELEMETRY_SINKS})
|
||||
--otel-endpoint URL OTLP endpoint provided to Llama Stack (default: ${OTEL_EXPORTER_OTLP_ENDPOINT})
|
||||
-h, --help Show this help message
|
||||
|
||||
For more information:
|
||||
|
|
@ -127,6 +231,26 @@ while [[ $# -gt 0 ]]; do
|
|||
WAIT_TIMEOUT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--with-telemetry)
|
||||
WITH_TELEMETRY=true
|
||||
shift
|
||||
;;
|
||||
--no-telemetry|--without-telemetry)
|
||||
WITH_TELEMETRY=false
|
||||
shift
|
||||
;;
|
||||
--telemetry-service)
|
||||
TELEMETRY_SERVICE_NAME="$2"
|
||||
shift 2
|
||||
;;
|
||||
--telemetry-sinks)
|
||||
TELEMETRY_SINKS="$2"
|
||||
shift 2
|
||||
;;
|
||||
--otel-endpoint)
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
die "Unknown option: $1"
|
||||
;;
|
||||
|
|
@ -171,7 +295,11 @@ if [ "$ENGINE" = "podman" ] && [ "$(uname -s)" = "Darwin" ]; then
|
|||
fi
|
||||
|
||||
# Clean up any leftovers from earlier runs
|
||||
for name in ollama-server llama-stack; do
|
||||
containers=(ollama-server llama-stack)
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
containers+=(jaeger otel-collector prometheus grafana)
|
||||
fi
|
||||
for name in "${containers[@]}"; do
|
||||
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
||||
if [ -n "$ids" ]; then
|
||||
log "⚠️ Found existing container(s) for '${name}', removing..."
|
||||
|
|
@ -191,6 +319,64 @@ if ! $ENGINE network inspect llama-net >/dev/null 2>&1; then
|
|||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Telemetry Stack
|
||||
###############################################################################
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
TEMP_TELEMETRY_DIR="$(mktemp -d)"
|
||||
TELEMETRY_ASSETS_DIR="$TEMP_TELEMETRY_DIR"
|
||||
log "🧰 Materializing telemetry configs..."
|
||||
materialize_telemetry_configs "$TELEMETRY_ASSETS_DIR"
|
||||
|
||||
log "📡 Starting telemetry stack..."
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name jaeger \
|
||||
--network llama-net \
|
||||
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||
-p 16686:16686 \
|
||||
-p 14250:14250 \
|
||||
-p 9411:9411 \
|
||||
docker.io/jaegertracing/all-in-one:latest > /dev/null 2>&1; then
|
||||
die "Jaeger startup failed"
|
||||
fi
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name otel-collector \
|
||||
--network llama-net \
|
||||
-p 4318:4318 \
|
||||
-p 4317:4317 \
|
||||
-p 9464:9464 \
|
||||
-p 13133:13133 \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z" \
|
||||
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||
--config /etc/otel-collector-config.yaml > /dev/null 2>&1; then
|
||||
die "OpenTelemetry Collector startup failed"
|
||||
fi
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name prometheus \
|
||||
--network llama-net \
|
||||
-p 9090:9090 \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/prometheus.yml:/etc/prometheus/prometheus.yml:Z" \
|
||||
docker.io/prom/prometheus:latest \
|
||||
--config.file=/etc/prometheus/prometheus.yml \
|
||||
--storage.tsdb.path=/prometheus \
|
||||
--web.console.libraries=/etc/prometheus/console_libraries \
|
||||
--web.console.templates=/etc/prometheus/consoles \
|
||||
--storage.tsdb.retention.time=200h \
|
||||
--web.enable-lifecycle > /dev/null 2>&1; then
|
||||
die "Prometheus startup failed"
|
||||
fi
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name grafana \
|
||||
--network llama-net \
|
||||
-p 3000:3000 \
|
||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||
die "Grafana startup failed"
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# 1. Ollama
|
||||
###############################################################################
|
||||
|
|
@ -218,9 +404,19 @@ fi
|
|||
###############################################################################
|
||||
# 2. Llama‑Stack
|
||||
###############################################################################
|
||||
server_env_opts=()
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
server_env_opts+=(
|
||||
-e TELEMETRY_SINKS="${TELEMETRY_SINKS}"
|
||||
-e OTEL_EXPORTER_OTLP_ENDPOINT="${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||
-e OTEL_SERVICE_NAME="${TELEMETRY_SERVICE_NAME}"
|
||||
)
|
||||
fi
|
||||
|
||||
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
||||
--network llama-net \
|
||||
-p "${PORT}:${PORT}" \
|
||||
"${server_env_opts[@]}" \
|
||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
||||
"${SERVER_IMAGE}" --port "${PORT}")
|
||||
|
||||
|
|
@ -244,5 +440,12 @@ log "👉 API endpoint: http://localhost:${PORT}"
|
|||
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
||||
log "💻 To access the llama stack CLI, exec into the container:"
|
||||
log " $ENGINE exec -ti llama-stack bash"
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
log "📡 Telemetry dashboards:"
|
||||
log " Jaeger UI: http://localhost:16686"
|
||||
log " Prometheus UI: http://localhost:9090"
|
||||
log " Grafana UI: http://localhost:3000 (admin/admin)"
|
||||
log " OTEL Collector: http://localhost:4318"
|
||||
fi
|
||||
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
||||
log ""
|
||||
|
|
|
|||
|
|
@ -42,9 +42,12 @@ Setups are defined in tests/integration/setups.py and provide global configurati
|
|||
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
|
||||
|
||||
Examples:
|
||||
# Basic inference tests with ollama
|
||||
# Basic inference tests with ollama (server mode)
|
||||
$0 --stack-config server:ci-tests --suite base --setup ollama
|
||||
|
||||
# Basic inference tests with docker
|
||||
$0 --stack-config docker:ci-tests --suite base --setup ollama
|
||||
|
||||
# Multiple test directories with vllm
|
||||
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
|
||||
|
||||
|
|
@ -153,7 +156,7 @@ echo "Setting SQLITE_STORE_DIR: $SQLITE_STORE_DIR"
|
|||
|
||||
# Determine stack config type for api_recorder test isolation
|
||||
if [[ "$COLLECT_ONLY" == false ]]; then
|
||||
if [[ "$STACK_CONFIG" == server:* ]]; then
|
||||
if [[ "$STACK_CONFIG" == server:* ]] || [[ "$STACK_CONFIG" == docker:* ]]; then
|
||||
export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server"
|
||||
echo "Setting stack config type: server"
|
||||
else
|
||||
|
|
@ -229,6 +232,104 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
trap stop_server EXIT ERR INT TERM
|
||||
fi
|
||||
|
||||
# Start Docker Container if needed
|
||||
if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
||||
stop_container() {
|
||||
echo "Stopping Docker container..."
|
||||
container_name="llama-stack-test-$DISTRO"
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
|
||||
echo "Stopping and removing container: $container_name"
|
||||
docker stop "$container_name" 2>/dev/null || true
|
||||
docker rm "$container_name" 2>/dev/null || true
|
||||
else
|
||||
echo "No container named $container_name found"
|
||||
fi
|
||||
echo "Docker container stopped"
|
||||
}
|
||||
|
||||
# Extract distribution name from docker:distro format
|
||||
DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://')
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||
# Set LLAMA_STACK_DIR to repo root
|
||||
# USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
|
||||
BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
|
||||
if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
|
||||
echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
|
||||
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
|
||||
else
|
||||
echo "Local mode: will mount source for live development"
|
||||
fi
|
||||
|
||||
eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "❌ Failed to build Docker image"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Starting Docker Container ==="
|
||||
container_name="llama-stack-test-$DISTRO"
|
||||
|
||||
# Stop and remove existing container if it exists
|
||||
docker stop "$container_name" 2>/dev/null || true
|
||||
docker rm "$container_name" 2>/dev/null || true
|
||||
|
||||
# Build environment variables for docker run
|
||||
DOCKER_ENV_VARS=""
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||
|
||||
# Pass through API keys if they exist
|
||||
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
|
||||
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
|
||||
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
|
||||
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
|
||||
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
|
||||
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
|
||||
[ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
|
||||
|
||||
# Determine the actual image name (may have localhost/ prefix)
|
||||
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
|
||||
if [[ -z "$IMAGE_NAME" ]]; then
|
||||
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
|
||||
exit 1
|
||||
fi
|
||||
echo "Using image: $IMAGE_NAME"
|
||||
|
||||
docker run -d --network host --name "$container_name" \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
$DOCKER_ENV_VARS \
|
||||
-v $ROOT_DIR:/app/llama-stack-source \
|
||||
"$IMAGE_NAME" \
|
||||
--port $LLAMA_STACK_PORT
|
||||
|
||||
echo "Waiting for Docker container to start..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then
|
||||
echo "✅ Docker container started successfully"
|
||||
break
|
||||
fi
|
||||
if [[ $i -eq 30 ]]; then
|
||||
echo "❌ Docker container failed to start"
|
||||
echo "Container logs:"
|
||||
docker logs "$container_name"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Update STACK_CONFIG to point to the running container
|
||||
STACK_CONFIG="http://localhost:$LLAMA_STACK_PORT"
|
||||
|
||||
trap stop_container EXIT ERR INT TERM
|
||||
fi
|
||||
|
||||
# Run tests
|
||||
echo "=== Running Integration Tests ==="
|
||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||
|
|
|
|||
|
|
@ -70,10 +70,15 @@ class BatchHelper:
|
|||
):
|
||||
"""Wait for a batch to reach a terminal status.
|
||||
|
||||
Uses exponential backoff polling strategy for efficient waiting:
|
||||
- Starts with short intervals (0.1s) for fast batches (e.g., replay mode)
|
||||
- Doubles interval each iteration up to a maximum
|
||||
- Adapts automatically to both fast and slow batch processing
|
||||
|
||||
Args:
|
||||
batch_id: The batch ID to monitor
|
||||
max_wait_time: Maximum time to wait in seconds (default: 60 seconds)
|
||||
sleep_interval: Time to sleep between checks in seconds (default: 1/10th of max_wait_time, min 1s, max 15s)
|
||||
sleep_interval: If provided, uses fixed interval instead of exponential backoff
|
||||
expected_statuses: Set of expected terminal statuses (default: {"completed"})
|
||||
timeout_action: Action on timeout - "fail" (pytest.fail) or "skip" (pytest.skip)
|
||||
|
||||
|
|
@ -84,10 +89,6 @@ class BatchHelper:
|
|||
pytest.Failed: If batch reaches an unexpected status or timeout_action is "fail"
|
||||
pytest.Skipped: If timeout_action is "skip" on timeout or unexpected status
|
||||
"""
|
||||
if sleep_interval is None:
|
||||
# Default to 1/10th of max_wait_time, with min 1s and max 15s
|
||||
sleep_interval = max(1, min(15, max_wait_time // 10))
|
||||
|
||||
if expected_statuses is None:
|
||||
expected_statuses = {"completed"}
|
||||
|
||||
|
|
@ -95,6 +96,15 @@ class BatchHelper:
|
|||
unexpected_statuses = terminal_statuses - expected_statuses
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Use exponential backoff if no explicit sleep_interval provided
|
||||
if sleep_interval is None:
|
||||
current_interval = 0.1 # Start with 100ms
|
||||
max_interval = 10.0 # Cap at 10 seconds
|
||||
else:
|
||||
current_interval = sleep_interval
|
||||
max_interval = sleep_interval
|
||||
|
||||
while time.time() - start_time < max_wait_time:
|
||||
current_batch = self.client.batches.retrieve(batch_id)
|
||||
|
||||
|
|
@ -107,7 +117,11 @@ class BatchHelper:
|
|||
else:
|
||||
pytest.fail(error_msg)
|
||||
|
||||
time.sleep(sleep_interval)
|
||||
time.sleep(current_interval)
|
||||
|
||||
# Exponential backoff: double the interval each time, up to max
|
||||
if sleep_interval is None:
|
||||
current_interval = min(current_interval * 2, max_interval)
|
||||
|
||||
timeout_msg = f"Batch did not reach expected status {expected_statuses} within {max_wait_time} seconds"
|
||||
if timeout_action == "skip":
|
||||
|
|
|
|||
|
|
@ -0,0 +1,506 @@
|
|||
{
|
||||
"test_id": null,
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is 2 + 2?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "The answer to the equation 2 + 2 is 4."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Tell me a short joke"
|
||||
}
|
||||
],
|
||||
"max_tokens": 0,
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Why",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " did",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " scare",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "crow",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " win",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " an",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " award",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "?\n\n",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Because",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " he",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " was",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " outstanding",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " in",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " his",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " field",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
{
|
||||
"test_id": null,
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/models",
|
||||
"headers": {},
|
||||
"body": {},
|
||||
"endpoint": "/v1/models",
|
||||
"model": ""
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:3b-instruct-fp16",
|
||||
"created": 1760453641,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "qwen3:4b",
|
||||
"created": 1757615302,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-oss:latest",
|
||||
"created": 1756395223,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "nomic-embed-text:latest",
|
||||
"created": 1756318548,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:3b",
|
||||
"created": 1755191039,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "all-minilm:l6-v2",
|
||||
"created": 1753968177,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:1b",
|
||||
"created": 1746124735,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:latest",
|
||||
"created": 1746044170,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
|
|
@ -42,7 +42,9 @@ def pytest_sessionstart(session):
|
|||
|
||||
# Set test stack config type for api_recorder test isolation
|
||||
stack_config = session.config.getoption("--stack-config", default=None)
|
||||
if stack_config and (stack_config.startswith("server:") or stack_config.startswith("http")):
|
||||
if stack_config and (
|
||||
stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http")
|
||||
):
|
||||
os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
|
||||
logger.info(f"Test stack config type: server (stack_config={stack_config})")
|
||||
else:
|
||||
|
|
@ -139,7 +141,9 @@ def pytest_addoption(parser):
|
|||
a 'pointer' to the stack. this can be either be:
|
||||
(a) a template name like `starter`, or
|
||||
(b) a path to a run.yaml file, or
|
||||
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
|
||||
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
|
||||
(d) a server config like `server:ci-tests`, or
|
||||
(e) a docker config like `docker:ci-tests` (builds and runs container)
|
||||
"""
|
||||
),
|
||||
)
|
||||
|
|
|
|||
95
tests/integration/telemetry/conftest.py
Normal file
95
tests/integration/telemetry/conftest.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Telemetry test configuration using OpenTelemetry SDK exporters.
|
||||
|
||||
This conftest provides in-memory telemetry collection for library_client mode only.
|
||||
Tests using these fixtures should skip in server mode since the in-memory collector
|
||||
cannot access spans from a separate server process.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import opentelemetry.metrics as otel_metrics
|
||||
import opentelemetry.trace as otel_trace
|
||||
import pytest
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import InMemoryMetricReader
|
||||
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
|
||||
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
|
||||
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
|
||||
from llama_stack.testing.api_recorder import patch_httpx_for_test_id
|
||||
from tests.integration.fixtures.common import instantiate_llama_stack_client
|
||||
|
||||
|
||||
class TestCollector:
|
||||
def __init__(self, span_exp, metric_read):
|
||||
assert span_exp and metric_read
|
||||
self.span_exporter = span_exp
|
||||
self.metric_reader = metric_read
|
||||
|
||||
def get_spans(self) -> tuple[ReadableSpan, ...]:
|
||||
return self.span_exporter.get_finished_spans()
|
||||
|
||||
def get_metrics(self) -> Any | None:
|
||||
metrics = self.metric_reader.get_metrics_data()
|
||||
if metrics and metrics.resource_metrics:
|
||||
return metrics.resource_metrics[0].scope_metrics[0].metrics
|
||||
return None
|
||||
|
||||
def clear(self) -> None:
|
||||
self.span_exporter.clear()
|
||||
self.metric_reader.get_metrics_data()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def _telemetry_providers():
|
||||
"""Set up in-memory OTEL providers before llama_stack_client initializes."""
|
||||
# Reset set-once flags to allow re-initialization
|
||||
if hasattr(otel_trace, "_TRACER_PROVIDER_SET_ONCE"):
|
||||
otel_trace._TRACER_PROVIDER_SET_ONCE._done = False # type: ignore
|
||||
if hasattr(otel_metrics, "_METER_PROVIDER_SET_ONCE"):
|
||||
otel_metrics._METER_PROVIDER_SET_ONCE._done = False # type: ignore
|
||||
|
||||
# Create in-memory exporters/readers
|
||||
span_exporter = InMemorySpanExporter()
|
||||
tracer_provider = TracerProvider()
|
||||
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
metric_reader = InMemoryMetricReader()
|
||||
meter_provider = MeterProvider(metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
# Set module-level provider so TelemetryAdapter uses our in-memory providers
|
||||
telemetry_module._TRACER_PROVIDER = tracer_provider
|
||||
|
||||
yield (span_exporter, metric_reader, tracer_provider, meter_provider)
|
||||
|
||||
telemetry_module._TRACER_PROVIDER = None
|
||||
tracer_provider.shutdown()
|
||||
meter_provider.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def llama_stack_client(_telemetry_providers, request):
|
||||
"""Override llama_stack_client to ensure in-memory telemetry providers are used."""
|
||||
patch_httpx_for_test_id()
|
||||
client = instantiate_llama_stack_client(request.session)
|
||||
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_otlp_collector(_telemetry_providers):
|
||||
"""Provides access to telemetry data and clears between tests."""
|
||||
span_exporter, metric_reader, _, _ = _telemetry_providers
|
||||
collector = TestCollector(span_exporter, metric_reader)
|
||||
yield collector
|
||||
collector.clear()
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"test_id": "tests/integration/telemetry/test_openai_telemetry.py::test_openai_completion_creates_telemetry[txt=ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test OpenAI telemetry creation"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-0de60cd6a6ec",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "I'm happy to help you with setting up and testing OpenAI's telemetry creation.\n\nOpenAI provides a feature called \"Telemetry\" which allows developers to collect data about their users' interactions with the model. To test this feature, we need to create a simple application that uses the OpenAI API and sends telemetry data to their servers.\n\nHere's an example code in Python that demonstrates how to create a simple telemetry creator:\n\n```python\nimport os\nfrom openai.api import API\n\n# Initialize the OpenAI API client\napi = API(os.environ['OPENAI_API_KEY'])\n\ndef create_user():\n # Create a new user entity\n user_entity = {\n 'id': 'user-123',\n 'name': 'John Doe',\n 'email': 'john.doe@example.com'\n }\n \n # Send the user creation request to OpenAI\n response = api.users.create(user_entity)\n print(f\"User created: {response}\")\n\ndef create_transaction():\n # Create a new transaction entity\n transaction_entity = {\n 'id': 'tran-123',\n 'user_id': 'user-123',\n 'transaction_type': 'query'\n }\n \n # Send the transaction creation request to OpenAI\n response = api.transactions.create(transaction_entity)\n print(f\"Transaction created: {response}\")\n\ndef send_telemetry_data():\n # Create a new telemetry event entity\n telemetry_event_entity = {\n 'id': 'telem-123',\n 'transaction_id': 'tran-123',\n 'data': '{ \"event\": \"test\", \"user_id\": 1 }'\n }\n \n # Send the telemetry data to OpenAI\n response = api.telemetry.create(telemetry_event_entity)\n print(f\"Telemetry event sent: {response}\")\n\n# Test the telemetry creation\ncreate_user()\ncreate_transaction()\nsend_telemetry_data()\n```\n\nMake sure you replace `OPENAI_API_KEY` with your actual API key. Also, ensure that you have the OpenAI API client library installed by running `pip install openai`.\n\nOnce you've created the test code, run it and observe the behavior of the telemetry creation process.\n\nPlease let me know if you need further modifications or assistance!",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 460,
|
||||
"prompt_tokens": 30,
|
||||
"total_tokens": 490,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
{
|
||||
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test trace openai with temperature 0.7"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100,
|
||||
"stream": false,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-1fcfd86d8111",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n input",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 100,
|
||||
"prompt_tokens": 35,
|
||||
"total_tokens": 135,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
4211
tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
generated
Normal file
4211
tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
generated
Normal file
File diff suppressed because it is too large
Load diff
4263
tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
generated
Normal file
4263
tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,59 @@
|
|||
{
|
||||
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://localhost:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test trace openai with temperature 0.7"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100,
|
||||
"stream": false,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-dba5042d6691",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "To test the \"trace\" functionality of OpenAI's GPT-4 model at a temperature of 0.7, you can follow these steps:\n\n1. First, make sure you have an account with OpenAI and have been granted access to their API.\n\n2. You will need to install the `transformers` library, which is the official library for working with Transformers models like GPT-4:\n\n ```bash\npip install transformers\n```\n\n3. Next, import the necessary",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 100,
|
||||
"prompt_tokens": 35,
|
||||
"total_tokens": 135,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
112
tests/integration/telemetry/test_completions.py
Normal file
112
tests/integration/telemetry/test_completions.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Telemetry tests verifying @trace_protocol decorator format using in-memory exporter."""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE") == "server",
|
||||
reason="In-memory telemetry tests only work in library_client mode (server mode runs in separate process)",
|
||||
)
|
||||
|
||||
|
||||
def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Verify streaming adds chunk_count and __type__=async_generator."""
|
||||
|
||||
stream = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test trace openai 1"}],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
chunks = list(stream)
|
||||
assert len(chunks) > 0
|
||||
|
||||
spans = mock_otlp_collector.get_spans()
|
||||
assert len(spans) > 0
|
||||
|
||||
chunk_count = None
|
||||
for span in spans:
|
||||
if span.attributes.get("__type__") == "async_generator":
|
||||
chunk_count = span.attributes.get("chunk_count")
|
||||
if chunk_count:
|
||||
chunk_count = int(chunk_count)
|
||||
break
|
||||
|
||||
assert chunk_count is not None
|
||||
assert chunk_count == len(chunks)
|
||||
|
||||
|
||||
def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Comprehensive validation of telemetry data format including spans and metrics."""
|
||||
response = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],
|
||||
temperature=0.7,
|
||||
max_tokens=100,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Handle both dict and Pydantic model for usage
|
||||
# This occurs due to the replay system returning a dict for usage, but the client returning a Pydantic model
|
||||
# TODO: Fix this by making the replay system return a Pydantic model for usage
|
||||
usage = response.usage if isinstance(response.usage, dict) else response.usage.model_dump()
|
||||
assert usage.get("prompt_tokens") and usage["prompt_tokens"] > 0
|
||||
assert usage.get("completion_tokens") and usage["completion_tokens"] > 0
|
||||
assert usage.get("total_tokens") and usage["total_tokens"] > 0
|
||||
|
||||
# Verify spans
|
||||
spans = mock_otlp_collector.get_spans()
|
||||
assert len(spans) == 5
|
||||
|
||||
# we only need this captured one time
|
||||
logged_model_id = None
|
||||
|
||||
for span in spans:
|
||||
attrs = span.attributes
|
||||
assert attrs is not None
|
||||
|
||||
# Root span is created manually by tracing middleware, not by @trace_protocol decorator
|
||||
is_root_span = attrs.get("__root__") is True
|
||||
|
||||
if is_root_span:
|
||||
# Root spans have different attributes
|
||||
assert attrs.get("__location__") in ["library_client", "server"]
|
||||
else:
|
||||
# Non-root spans are created by @trace_protocol decorator
|
||||
assert attrs.get("__autotraced__")
|
||||
assert attrs.get("__class__") and attrs.get("__method__")
|
||||
assert attrs.get("__type__") in ["async", "sync", "async_generator"]
|
||||
|
||||
args = json.loads(attrs["__args__"])
|
||||
if "model_id" in args:
|
||||
logged_model_id = args["model_id"]
|
||||
|
||||
assert logged_model_id is not None
|
||||
assert logged_model_id == text_model_id
|
||||
|
||||
# TODO: re-enable this once metrics get fixed
|
||||
"""
|
||||
# Verify token usage metrics in response
|
||||
metrics = mock_otlp_collector.get_metrics()
|
||||
|
||||
assert metrics
|
||||
for metric in metrics:
|
||||
assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
|
||||
assert metric.unit == "tokens"
|
||||
assert metric.data.data_points and len(metric.data.data_points) == 1
|
||||
match metric.name:
|
||||
case "completion_tokens":
|
||||
assert metric.data.data_points[0].value == usage["completion_tokens"]
|
||||
case "total_tokens":
|
||||
assert metric.data.data_points[0].value == usage["total_tokens"]
|
||||
case "prompt_tokens":
|
||||
assert metric.data.data_points[0].value == usage["prompt_tokens"
|
||||
"""
|
||||
50
tests/unit/distribution/test_stack_list_deps.py
Normal file
50
tests/unit/distribution/test_stack_list_deps.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
from io import StringIO
|
||||
from unittest.mock import patch
|
||||
|
||||
from llama_stack.cli.stack._list_deps import (
|
||||
run_stack_list_deps_command,
|
||||
)
|
||||
|
||||
|
||||
def test_stack_list_deps_basic():
|
||||
args = argparse.Namespace(
|
||||
config=None,
|
||||
env_name="test-env",
|
||||
providers="inference=remote::ollama",
|
||||
format="deps-only",
|
||||
)
|
||||
|
||||
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
|
||||
run_stack_list_deps_command(args)
|
||||
output = mock_stdout.getvalue()
|
||||
|
||||
# deps-only format should NOT include "uv pip install" or "Dependencies for"
|
||||
assert "uv pip install" not in output
|
||||
assert "Dependencies for" not in output
|
||||
|
||||
# Check that expected dependencies are present
|
||||
assert "ollama" in output
|
||||
assert "aiohttp" in output
|
||||
assert "fastapi" in output
|
||||
|
||||
|
||||
def test_stack_list_deps_with_distro_uv():
|
||||
args = argparse.Namespace(
|
||||
config="starter",
|
||||
env_name=None,
|
||||
providers=None,
|
||||
format="uv",
|
||||
)
|
||||
|
||||
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
|
||||
run_stack_list_deps_command(args)
|
||||
output = mock_stdout.getvalue()
|
||||
|
||||
assert "uv pip install" in output
|
||||
38
uv.lock
generated
38
uv.lock
generated
|
|
@ -4129,27 +4129,27 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.12.5"
|
||||
version = "0.9.10"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/30/cd/01015eb5034605fd98d829c5839ec2c6b4582b479707f7c1c2af861e8258/ruff-0.12.5.tar.gz", hash = "sha256:b209db6102b66f13625940b7f8c7d0f18e20039bb7f6101fbdac935c9612057e", size = 5170722, upload-time = "2025-07-24T13:26:37.456Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/20/8e/fafaa6f15c332e73425d9c44ada85360501045d5ab0b81400076aff27cf6/ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7", size = 3759776, upload-time = "2025-03-07T15:27:44.363Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/de/ad2f68f0798ff15dd8c0bcc2889558970d9a685b3249565a937cd820ad34/ruff-0.12.5-py3-none-linux_armv6l.whl", hash = "sha256:1de2c887e9dec6cb31fcb9948299de5b2db38144e66403b9660c9548a67abd92", size = 11819133, upload-time = "2025-07-24T13:25:56.369Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/fc/c6b65cd0e7fbe60f17e7ad619dca796aa49fbca34bb9bea5f8faf1ec2643/ruff-0.12.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d1ab65e7d8152f519e7dea4de892317c9da7a108da1c56b6a3c1d5e7cf4c5e9a", size = 12501114, upload-time = "2025-07-24T13:25:59.471Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/de/c6bec1dce5ead9f9e6a946ea15e8d698c35f19edc508289d70a577921b30/ruff-0.12.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:962775ed5b27c7aa3fdc0d8f4d4433deae7659ef99ea20f783d666e77338b8cf", size = 11716873, upload-time = "2025-07-24T13:26:01.496Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/16/cf372d2ebe91e4eb5b82a2275c3acfa879e0566a7ac94d331ea37b765ac8/ruff-0.12.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b4cae449597e7195a49eb1cdca89fd9fbb16140c7579899e87f4c85bf82f73", size = 11958829, upload-time = "2025-07-24T13:26:03.721Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/25/bf/cd07e8f6a3a6ec746c62556b4c4b79eeb9b0328b362bb8431b7b8afd3856/ruff-0.12.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b13489c3dc50de5e2d40110c0cce371e00186b880842e245186ca862bf9a1ac", size = 11626619, upload-time = "2025-07-24T13:26:06.118Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/c9/c2ccb3b8cbb5661ffda6925f81a13edbb786e623876141b04919d1128370/ruff-0.12.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1504fea81461cf4841778b3ef0a078757602a3b3ea4b008feb1308cb3f23e08", size = 13221894, upload-time = "2025-07-24T13:26:08.292Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/58/68a5be2c8e5590ecdad922b2bcd5583af19ba648f7648f95c51c3c1eca81/ruff-0.12.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c7da4129016ae26c32dfcbd5b671fe652b5ab7fc40095d80dcff78175e7eddd4", size = 14163909, upload-time = "2025-07-24T13:26:10.474Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/d1/ef6b19622009ba8386fdb792c0743f709cf917b0b2f1400589cbe4739a33/ruff-0.12.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca972c80f7ebcfd8af75a0f18b17c42d9f1ef203d163669150453f50ca98ab7b", size = 13583652, upload-time = "2025-07-24T13:26:13.381Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/e3/1c98c566fe6809a0c83751d825a03727f242cdbe0d142c9e292725585521/ruff-0.12.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbf9f25dfb501f4237ae7501d6364b76a01341c6f1b2cd6764fe449124bb2a", size = 12700451, upload-time = "2025-07-24T13:26:15.488Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/24/ff/96058f6506aac0fbc0d0fc0d60b0d0bd746240a0594657a2d94ad28033ba/ruff-0.12.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c47dea6ae39421851685141ba9734767f960113d51e83fd7bb9958d5be8763a", size = 12937465, upload-time = "2025-07-24T13:26:17.808Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/d3/68bc5e7ab96c94b3589d1789f2dd6dd4b27b263310019529ac9be1e8f31b/ruff-0.12.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5076aa0e61e30f848846f0265c873c249d4b558105b221be1828f9f79903dc5", size = 11771136, upload-time = "2025-07-24T13:26:20.422Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/75/7356af30a14584981cabfefcf6106dea98cec9a7af4acb5daaf4b114845f/ruff-0.12.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a5a4c7830dadd3d8c39b1cc85386e2c1e62344f20766be6f173c22fb5f72f293", size = 11601644, upload-time = "2025-07-24T13:26:22.928Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/67/91c71d27205871737cae11025ee2b098f512104e26ffd8656fd93d0ada0a/ruff-0.12.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:46699f73c2b5b137b9dc0fc1a190b43e35b008b398c6066ea1350cce6326adcb", size = 12478068, upload-time = "2025-07-24T13:26:26.134Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/04/b6b00383cf2f48e8e78e14eb258942fdf2a9bf0287fbf5cdd398b749193a/ruff-0.12.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a655a0a0d396f0f072faafc18ebd59adde8ca85fb848dc1b0d9f024b9c4d3bb", size = 12991537, upload-time = "2025-07-24T13:26:28.533Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/b9/053d6445dc7544fb6594785056d8ece61daae7214859ada4a152ad56b6e0/ruff-0.12.5-py3-none-win32.whl", hash = "sha256:dfeb2627c459b0b78ca2bbdc38dd11cc9a0a88bf91db982058b26ce41714ffa9", size = 11751575, upload-time = "2025-07-24T13:26:30.835Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bc/0f/ab16e8259493137598b9149734fec2e06fdeda9837e6f634f5c4e35916da/ruff-0.12.5-py3-none-win_amd64.whl", hash = "sha256:ae0d90cf5f49466c954991b9d8b953bd093c32c27608e409ae3564c63c5306a5", size = 12882273, upload-time = "2025-07-24T13:26:32.929Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564, upload-time = "2025-07-24T13:26:34.994Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/b2/af7c2cc9e438cbc19fafeec4f20bfcd72165460fe75b2b6e9a0958c8c62b/ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d", size = 10049494, upload-time = "2025-03-07T15:26:51.268Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/12/03f6dfa1b95ddd47e6969f0225d60d9d7437c91938a310835feb27927ca0/ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d", size = 10853584, upload-time = "2025-03-07T15:26:56.104Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/49/1c79e0906b6ff551fb0894168763f705bf980864739572b2815ecd3c9df0/ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d", size = 10155692, upload-time = "2025-03-07T15:27:01.385Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/01/85e8082e41585e0e1ceb11e41c054e9e36fed45f4b210991052d8a75089f/ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c", size = 10369760, upload-time = "2025-03-07T15:27:04.023Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/90/0bc60bd4e5db051f12445046d0c85cc2c617095c0904f1aa81067dc64aea/ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e", size = 9912196, upload-time = "2025-03-07T15:27:06.93Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/ea/0b7e8c42b1ec608033c4d5a02939c82097ddcb0b3e393e4238584b7054ab/ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12", size = 11434985, upload-time = "2025-03-07T15:27:10.082Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/86/3171d1eff893db4f91755175a6e1163c5887be1f1e2f4f6c0c59527c2bfd/ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16", size = 12155842, upload-time = "2025-03-07T15:27:12.727Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/9e/700ca289f172a38eb0bca752056d0a42637fa17b81649b9331786cb791d7/ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52", size = 11613804, upload-time = "2025-03-07T15:27:15.944Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/92/648020b3b5db180f41a931a68b1c8575cca3e63cec86fd26807422a0dbad/ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1", size = 13823776, upload-time = "2025-03-07T15:27:18.996Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/a6/cc472161cd04d30a09d5c90698696b70c169eeba2c41030344194242db45/ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c", size = 11302673, upload-time = "2025-03-07T15:27:21.655Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/db/d31c361c4025b1b9102b4d032c70a69adb9ee6fde093f6c3bf29f831c85c/ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43", size = 10235358, upload-time = "2025-03-07T15:27:24.72Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/86/d6374e24a14d4d93ebe120f45edd82ad7dcf3ef999ffc92b197d81cdc2a5/ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c", size = 9886177, upload-time = "2025-03-07T15:27:27.282Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/62/a61691f6eaaac1e945a1f3f59f1eea9a218513139d5b6c2b8f88b43b5b8f/ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5", size = 10864747, upload-time = "2025-03-07T15:27:30.637Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/94/2c7065e1d92a8a8a46d46d9c3cf07b0aa7e0a1e0153d74baa5e6620b4102/ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8", size = 11360441, upload-time = "2025-03-07T15:27:33.356Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/8f/1f545ea6f9fcd7bf4368551fb91d2064d8f0577b3079bb3f0ae5779fb773/ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029", size = 10247401, upload-time = "2025-03-07T15:27:35.994Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4f/18/fb703603ab108e5c165f52f5b86ee2aa9be43bb781703ec87c66a5f5d604/ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1", size = 11366360, upload-time = "2025-03-07T15:27:38.66Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/35/85/338e603dc68e7d9994d5d84f24adbf69bae760ba5efd3e20f5ff2cec18da/ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69", size = 10436892, upload-time = "2025-03-07T15:27:41.687Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue