mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
Merge remote-tracking branch 'origin/main' into stores
This commit is contained in:
commit
490b212576
89 changed files with 19353 additions and 8323 deletions
19
.dockerignore
Normal file
19
.dockerignore
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
.venv
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
*.so
|
||||
.git
|
||||
.gitignore
|
||||
htmlcov*
|
||||
.coverage
|
||||
coverage*
|
||||
.cache
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
.ruff_cache
|
||||
uv.lock
|
||||
node_modules
|
||||
build
|
||||
/tmp
|
||||
|
|
@ -57,7 +57,7 @@ runs:
|
|||
echo "Building Llama Stack"
|
||||
|
||||
LLAMA_STACK_DIR=. \
|
||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
||||
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||
|
||||
- name: Configure git for commits
|
||||
shell: bash
|
||||
|
|
|
|||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -14,6 +14,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
|||
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
|
||||
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
|
||||
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
|
||||
| Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
|
||||
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
|
||||
| Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
|
||||
| Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
|
||||
|
|
|
|||
7
.github/workflows/install-script-ci.yml
vendored
7
.github/workflows/install-script-ci.yml
vendored
|
|
@ -30,8 +30,11 @@ jobs:
|
|||
|
||||
- name: Build a single provider
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync \
|
||||
llama stack build --template starter --image-type container --image-name test
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=starter \
|
||||
--tag llama-stack:starter-ci
|
||||
|
||||
- name: Run installer end-to-end
|
||||
run: |
|
||||
|
|
|
|||
4
.github/workflows/integration-tests.yml
vendored
4
.github/workflows/integration-tests.yml
vendored
|
|
@ -47,7 +47,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
client-type: [library, server]
|
||||
client-type: [library, server, docker]
|
||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||
|
|
@ -82,7 +82,7 @@ jobs:
|
|||
env:
|
||||
OPENAI_API_KEY: dummy
|
||||
with:
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
|
||||
setup: ${{ matrix.config.setup }}
|
||||
inference-mode: 'replay'
|
||||
suite: ${{ matrix.config.suite }}
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ jobs:
|
|||
|
||||
- name: Build Llama Stack
|
||||
run: |
|
||||
uv run --no-sync llama stack build --template ci-tests --image-type venv
|
||||
uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
|
||||
|
||||
- name: Check Storage and Memory Available Before Tests
|
||||
if: ${{ always() }}
|
||||
|
|
|
|||
57
.github/workflows/providers-build.yml
vendored
57
.github/workflows/providers-build.yml
vendored
|
|
@ -14,6 +14,8 @@ on:
|
|||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/distributions/**'
|
||||
- 'pyproject.toml'
|
||||
- 'containers/Containerfile'
|
||||
- '.dockerignore'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
|
|
@ -24,6 +26,8 @@ on:
|
|||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/distributions/**'
|
||||
- 'pyproject.toml'
|
||||
- 'containers/Containerfile'
|
||||
- '.dockerignore'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||
|
|
@ -60,15 +64,19 @@ jobs:
|
|||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Print build dependencies
|
||||
- name: Install distribution into venv
|
||||
if: matrix.image-type == 'venv'
|
||||
run: |
|
||||
uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
|
||||
uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||
|
||||
- name: Run Llama Stack Build
|
||||
- name: Build container image
|
||||
if: matrix.image-type == 'container'
|
||||
run: |
|
||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=${{ matrix.distro }} \
|
||||
--tag llama-stack:${{ matrix.distro }}-ci
|
||||
|
||||
- name: Print dependencies in the image
|
||||
if: matrix.image-type == 'venv'
|
||||
|
|
@ -86,8 +94,8 @@ jobs:
|
|||
|
||||
- name: Build a single provider
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
|
||||
|
||||
uv pip install -e .
|
||||
uv run --no-sync llama stack list-deps --providers inference=remote::ollama | xargs -L1 uv pip install
|
||||
build-custom-container-distribution:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
|
@ -97,11 +105,16 @@ jobs:
|
|||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Build a single provider
|
||||
- name: Build container image
|
||||
run: |
|
||||
yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
|
||||
yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||
-t llama-stack:ci-tests
|
||||
|
||||
- name: Inspect the container image entrypoint
|
||||
run: |
|
||||
|
|
@ -112,7 +125,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -129,17 +142,19 @@ jobs:
|
|||
- name: Pin distribution to UBI9 base
|
||||
run: |
|
||||
yq -i '
|
||||
.image_type = "container" |
|
||||
.image_name = "ubi9-test" |
|
||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||
' llama_stack/distributions/ci-tests/build.yaml
|
||||
|
||||
- name: Build dev container (UBI9)
|
||||
env:
|
||||
USE_COPY_NOT_MOUNT: "true"
|
||||
LLAMA_STACK_DIR: "."
|
||||
- name: Build UBI9 container image
|
||||
run: |
|
||||
uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||
-t llama-stack:ci-tests-ubi9
|
||||
|
||||
- name: Inspect UBI9 image
|
||||
run: |
|
||||
|
|
@ -150,7 +165,7 @@ jobs:
|
|||
fi
|
||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
||||
echo "Entrypoint: $entrypoint"
|
||||
if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
|
||||
if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
|
||||
echo "Entrypoint is not correct"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
105
.github/workflows/providers-list-deps.yml
vendored
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
name: Test llama stack list-deps
|
||||
|
||||
run-name: Test llama stack list-deps
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/list_deps.py'
|
||||
- 'llama_stack/cli/stack/_list_deps.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-list-deps.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/list_deps.py'
|
||||
- 'llama_stack/cli/stack/_list_deps.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-list-deps.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Generate Distribution List
|
||||
id: set-matrix
|
||||
run: |
|
||||
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||
|
||||
list-deps:
|
||||
needs: generate-matrix
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
|
||||
image-type: [venv, container]
|
||||
fail-fast: false # We want to run all jobs even if some fail
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Print dependencies
|
||||
run: |
|
||||
uv run llama stack list-deps ${{ matrix.distro }}
|
||||
|
||||
- name: Install Distro using llama stack list-deps
|
||||
run: |
|
||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
|
||||
|
||||
- name: Print dependencies in the image
|
||||
if: matrix.image-type == 'venv'
|
||||
run: |
|
||||
uv pip list
|
||||
|
||||
show-single-provider:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: Show a single provider
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps --providers inference=remote::ollama
|
||||
|
||||
list-deps-from-config:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
||||
- name: list-des from Config
|
||||
env:
|
||||
USE_COPY_NOT_MOUNT: "true"
|
||||
LLAMA_STACK_DIR: "."
|
||||
run: |
|
||||
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
|
||||
|
|
@ -46,9 +46,9 @@ jobs:
|
|||
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
|
||||
cat tests/external/ramalama-stack/run.yaml
|
||||
|
||||
- name: Build distro from config file
|
||||
- name: Install distribution dependencies
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
|
||||
uv run llama stack list-deps tests/external/ramalama-stack/build.yaml | xargs -L1 uv pip install
|
||||
|
||||
- name: Start Llama Stack server in background
|
||||
if: ${{ matrix.image-type }} == 'venv'
|
||||
|
|
|
|||
7
.github/workflows/test-external.yml
vendored
7
.github/workflows/test-external.yml
vendored
|
|
@ -44,11 +44,14 @@ jobs:
|
|||
|
||||
- name: Print distro dependencies
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml --print-deps-only
|
||||
uv run --no-sync llama stack list-deps tests/external/build.yaml
|
||||
|
||||
- name: Build distro from config file
|
||||
run: |
|
||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml
|
||||
uv venv ci-test
|
||||
source ci-test/bin/activate
|
||||
uv pip install -e .
|
||||
LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/build.yaml | xargs -L1 uv pip install
|
||||
|
||||
- name: Start Llama Stack server in background
|
||||
if: ${{ matrix.image-type }} == 'venv'
|
||||
|
|
|
|||
|
|
@ -167,9 +167,9 @@ under the LICENSE file in the root directory of this source tree.
|
|||
|
||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||
|
||||
### Using `llama stack build`
|
||||
### Installing dependencies of distributions
|
||||
|
||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
||||
When installing dependencies for a distribution, you can use `llama stack list-deps` to view and install the required packages.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
|
|
@ -177,7 +177,12 @@ cd work/
|
|||
git clone https://github.com/llamastack/llama-stack.git
|
||||
git clone https://github.com/llamastack/llama-stack-client-python.git
|
||||
cd llama-stack
|
||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
||||
|
||||
# Show dependencies for a distribution
|
||||
llama stack list-deps <distro-name>
|
||||
|
||||
# Install dependencies
|
||||
llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
### Updating distribution configurations
|
||||
|
|
|
|||
|
|
@ -27,8 +27,11 @@ MODEL="Llama-4-Scout-17B-16E-Instruct"
|
|||
# get meta url from llama.com
|
||||
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
||||
|
||||
# install dependencies for the distribution
|
||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||
|
||||
# start a llama stack server
|
||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
|
||||
|
||||
# install client to interact with the server
|
||||
pip install llama-stack-client
|
||||
|
|
@ -89,7 +92,7 @@ As more providers start supporting Llama 4, you can use them in Llama Stack as w
|
|||
To try Llama Stack locally, run:
|
||||
|
||||
```bash
|
||||
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
|
||||
curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
### Overview
|
||||
|
|
|
|||
136
containers/Containerfile
Normal file
136
containers/Containerfile
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# syntax=docker/dockerfile:1.6
|
||||
#
|
||||
# This Dockerfile is used to build the Llama Stack container image.
|
||||
# Example:
|
||||
# docker build \
|
||||
# -f containers/Containerfile \
|
||||
# --build-arg DISTRO_NAME=starter \
|
||||
# --tag llama-stack:starter .
|
||||
|
||||
ARG BASE_IMAGE=python:3.12-slim
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
ARG INSTALL_MODE="pypi"
|
||||
ARG LLAMA_STACK_DIR="/workspace"
|
||||
ARG LLAMA_STACK_CLIENT_DIR=""
|
||||
ARG PYPI_VERSION=""
|
||||
ARG TEST_PYPI_VERSION=""
|
||||
ARG KEEP_WORKSPACE=""
|
||||
ARG DISTRO_NAME="starter"
|
||||
ARG RUN_CONFIG_PATH=""
|
||||
ARG UV_HTTP_TIMEOUT=500
|
||||
ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
WORKDIR /app
|
||||
|
||||
RUN set -eux; \
|
||||
if command -v dnf >/dev/null 2>&1; then \
|
||||
dnf -y update && \
|
||||
dnf install -y iputils git net-tools wget \
|
||||
vim-minimal python3.12 python3.12-pip python3.12-wheel \
|
||||
python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
|
||||
ln -sf /usr/bin/pip3.12 /usr/local/bin/pip && \
|
||||
ln -sf /usr/bin/python3.12 /usr/local/bin/python && \
|
||||
dnf clean all; \
|
||||
elif command -v apt-get >/dev/null 2>&1; then \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
iputils-ping net-tools iproute2 dnsutils telnet \
|
||||
curl wget git procps psmisc lsof traceroute bubblewrap \
|
||||
gcc g++ && \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
else \
|
||||
echo "Unsupported base image: expected dnf or apt-get" >&2; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
RUN pip install --no-cache-dir uv
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
|
||||
ENV INSTALL_MODE=${INSTALL_MODE}
|
||||
ENV LLAMA_STACK_DIR=${LLAMA_STACK_DIR}
|
||||
ENV LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR}
|
||||
ENV PYPI_VERSION=${PYPI_VERSION}
|
||||
ENV TEST_PYPI_VERSION=${TEST_PYPI_VERSION}
|
||||
ENV KEEP_WORKSPACE=${KEEP_WORKSPACE}
|
||||
ENV DISTRO_NAME=${DISTRO_NAME}
|
||||
ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH}
|
||||
|
||||
# Copy the repository so editable installs and run configurations are available.
|
||||
COPY . /workspace
|
||||
|
||||
# Install llama-stack
|
||||
RUN set -eux; \
|
||||
if [ "$INSTALL_MODE" = "editable" ]; then \
|
||||
if [ ! -d "$LLAMA_STACK_DIR" ]; then \
|
||||
echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
|
||||
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
|
||||
uv pip install --no-cache-dir fastapi libcst; \
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then \
|
||||
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
|
||||
else \
|
||||
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
|
||||
fi; \
|
||||
else \
|
||||
if [ -n "$PYPI_VERSION" ]; then \
|
||||
uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
|
||||
else \
|
||||
uv pip install --no-cache-dir llama-stack; \
|
||||
fi; \
|
||||
fi;
|
||||
|
||||
# Install the client package if it is provided
|
||||
RUN set -eux; \
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
|
||||
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
|
||||
fi;
|
||||
|
||||
# Install the dependencies for the distribution
|
||||
RUN set -eux; \
|
||||
if [ -z "$DISTRO_NAME" ]; then \
|
||||
echo "DISTRO_NAME must be provided" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
deps="$(llama stack list-deps "$DISTRO_NAME")"; \
|
||||
if [ -n "$deps" ]; then \
|
||||
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
RUN set -eux; \
|
||||
pip uninstall -y uv; \
|
||||
should_remove=1; \
|
||||
if [ -n "$KEEP_WORKSPACE" ]; then should_remove=0; fi; \
|
||||
if [ "$INSTALL_MODE" = "editable" ]; then should_remove=0; fi; \
|
||||
case "$RUN_CONFIG_PATH" in \
|
||||
/workspace*) should_remove=0 ;; \
|
||||
esac; \
|
||||
if [ "$should_remove" -eq 1 ] && [ -d /workspace ]; then rm -rf /workspace; fi
|
||||
|
||||
RUN cat <<'EOF' >/usr/local/bin/llama-stack-entrypoint.sh
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
if [ -n "$RUN_CONFIG_PATH" ] && [ -f "$RUN_CONFIG_PATH" ]; then
|
||||
exec llama stack run "$RUN_CONFIG_PATH" "$@"
|
||||
fi
|
||||
|
||||
if [ -n "$DISTRO_NAME" ]; then
|
||||
exec llama stack run "$DISTRO_NAME" "$@"
|
||||
fi
|
||||
|
||||
exec llama stack run "$@"
|
||||
EOF
|
||||
RUN chmod +x /usr/local/bin/llama-stack-entrypoint.sh
|
||||
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/llama-stack-entrypoint.sh"]
|
||||
|
|
@ -51,8 +51,8 @@ device: cpu
|
|||
You can access the HuggingFace trainer via the `starter` distribution:
|
||||
|
||||
```bash
|
||||
llama stack build --distro starter --image-type venv
|
||||
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
llama stack run starter
|
||||
```
|
||||
|
||||
### Usage Example
|
||||
|
|
|
|||
|
|
@ -175,8 +175,7 @@ llama-stack-client benchmarks register \
|
|||
**1. Start the Llama Stack API Server**
|
||||
|
||||
```bash
|
||||
# Build and run a distribution (example: together)
|
||||
llama stack build --distro together --image-type venv
|
||||
llama stack list-deps together | xargs -L1 uv pip install
|
||||
llama stack run together
|
||||
```
|
||||
|
||||
|
|
@ -209,7 +208,7 @@ The playground works with any Llama Stack distribution. Popular options include:
|
|||
<TabItem value="together" label="Together AI">
|
||||
|
||||
```bash
|
||||
llama stack build --distro together --image-type venv
|
||||
llama stack list-deps together | xargs -L1 uv pip install
|
||||
llama stack run together
|
||||
```
|
||||
|
||||
|
|
@ -222,7 +221,7 @@ llama stack run together
|
|||
<TabItem value="ollama" label="Ollama (Local)">
|
||||
|
||||
```bash
|
||||
llama stack build --distro ollama --image-type venv
|
||||
llama stack list-deps ollama | xargs -L1 uv pip install
|
||||
llama stack run ollama
|
||||
```
|
||||
|
||||
|
|
@ -235,7 +234,7 @@ llama stack run ollama
|
|||
<TabItem value="meta-reference" label="Meta Reference">
|
||||
|
||||
```bash
|
||||
llama stack build --distro meta-reference --image-type venv
|
||||
llama stack list-deps meta-reference | xargs -L1 uv pip install
|
||||
llama stack run meta-reference
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,8 @@ RAG enables your applications to reference and recall information from external
|
|||
In one terminal, start the Llama Stack server:
|
||||
|
||||
```bash
|
||||
uv run llama stack build --distro starter --image-type venv --run
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
llama stack run starter
|
||||
```
|
||||
|
||||
### 2. Connect with OpenAI Client
|
||||
|
|
|
|||
|
|
@ -62,6 +62,10 @@ The new `/v2` API must be introduced alongside the existing `/v1` API and run in
|
|||
|
||||
When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
|
||||
|
||||
### Deprecated APIs
|
||||
|
||||
Deprecated APIs are those that are no longer actively maintained or supported. Depreated APIs are marked with the flag `deprecated = True` in the OpenAPI spec. These APIs will be removed in a future release.
|
||||
|
||||
### API Stability vs. Provider Stability
|
||||
|
||||
The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
|
||||
|
|
|
|||
|
|
@ -158,17 +158,16 @@ under the LICENSE file in the root directory of this source tree.
|
|||
|
||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
||||
|
||||
### Using `llama stack build`
|
||||
### Setup for development
|
||||
|
||||
Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
||||
|
||||
Example:
|
||||
```bash
|
||||
cd work/
|
||||
git clone https://github.com/meta-llama/llama-stack.git
|
||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||
cd llama-stack
|
||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
|
||||
uv run llama stack list-deps <distro-name> | xargs -L1 uv pip install
|
||||
|
||||
# (Optional) If you are developing the llama-stack-client-python package, you can add it as an editable package.
|
||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
||||
uv add --editable ../llama-stack-client-python
|
||||
```
|
||||
|
||||
### Updating distribution configurations
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ def get_base_url(self) -> str:
|
|||
|
||||
## Testing the Provider
|
||||
|
||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
|
||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, install its dependencies with `llama stack list-deps together | xargs -L1 uv pip install`.
|
||||
|
||||
### 1. Integration Testing
|
||||
|
||||
|
|
|
|||
|
|
@ -5,225 +5,79 @@ sidebar_label: Build your own Distribution
|
|||
sidebar_position: 3
|
||||
---
|
||||
|
||||
This guide will walk you through the steps to get started with building a Llama Stack distribution from scratch with your choice of API providers.
|
||||
This guide walks you through inspecting existing distributions, customising their configuration, and building runnable artefacts for your own deployment.
|
||||
|
||||
### Explore existing distributions
|
||||
|
||||
### Setting your log level
|
||||
All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
|
||||
|
||||
In order to specify the proper logging level users can apply the following environment variable `LLAMA_STACK_LOGGING` with the following format:
|
||||
- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
|
||||
- `run.yaml` – sample run configuration (when provided).
|
||||
- Documentation fragments that power this site.
|
||||
|
||||
`LLAMA_STACK_LOGGING=server=debug;core=info`
|
||||
|
||||
Where each category in the following list:
|
||||
|
||||
- all
|
||||
- core
|
||||
- server
|
||||
- router
|
||||
- inference
|
||||
- agents
|
||||
- safety
|
||||
- eval
|
||||
- tools
|
||||
- client
|
||||
|
||||
Can be set to any of the following log levels:
|
||||
|
||||
- debug
|
||||
- info
|
||||
- warning
|
||||
- error
|
||||
- critical
|
||||
|
||||
The default global log level is `info`. `all` sets the log level for all components.
|
||||
|
||||
A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
|
||||
|
||||
### Llama Stack Build
|
||||
|
||||
In order to build your own distribution, we recommend you clone the `llama-stack` repository.
|
||||
|
||||
|
||||
```
|
||||
git clone git@github.com:meta-llama/llama-stack.git
|
||||
cd llama-stack
|
||||
pip install -e .
|
||||
```
|
||||
Use the CLI to build your distribution.
|
||||
The main points to consider are:
|
||||
1. **Image Type** - Do you want a venv environment or a Container (eg. Docker)
|
||||
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
|
||||
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
|
||||
|
||||
```
|
||||
llama stack build -h
|
||||
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--distro DISTRIBUTION] [--list-distros] [--image-type {container,venv}] [--image-name IMAGE_NAME] [--print-deps-only]
|
||||
[--run] [--providers PROVIDERS]
|
||||
|
||||
Build a Llama stack container
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to
|
||||
enter information interactively (default: None)
|
||||
--template TEMPLATE (deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default:
|
||||
None)
|
||||
--distro DISTRIBUTION, --distribution DISTRIBUTION
|
||||
Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default: None)
|
||||
--list-distros, --list-distributions
|
||||
Show the available distributions for building a Llama Stack distribution (default: False)
|
||||
--image-type {container,venv}
|
||||
Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
|
||||
--image-name IMAGE_NAME
|
||||
[for image-type=container|venv] Name of the virtual environment to use for the build. If not specified, currently active environment will be used if found. (default:
|
||||
None)
|
||||
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
||||
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
||||
--providers PROVIDERS
|
||||
Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per
|
||||
API. (default: None)
|
||||
```
|
||||
|
||||
After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.
|
||||
Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="template" label="Building from a template">
|
||||
To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
|
||||
<TabItem value="container" label="Building a container">
|
||||
|
||||
The following command will allow you to see the available templates and their corresponding providers.
|
||||
```
|
||||
llama stack build --list-templates
|
||||
Use the Containerfile at `containers/Containerfile`, which installs `llama-stack`, resolves distribution dependencies via `llama stack list-deps`, and sets the entrypoint to `llama stack run`.
|
||||
|
||||
```bash
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg DISTRO_NAME=starter \
|
||||
--tag llama-stack:starter
|
||||
```
|
||||
|
||||
```
|
||||
------------------------------+-----------------------------------------------------------------------------+
|
||||
| Template Name | Description |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| watsonx | Use watsonx for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| vllm-gpu | Use a built-in vLLM engine for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| together | Use Together.AI for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| tgi | Use (an external) TGI server for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| starter | Quick start template for running Llama Stack with several popular providers |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| sambanova | Use SambaNova for running LLM inference and safety |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| remote-vllm | Use (an external) vLLM server for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| postgres-demo | Quick start template for running Llama Stack with several popular providers |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| passthrough | Use Passthrough hosted llama-stack endpoint for LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| open-benchmark | Distribution for running open benchmarks |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| ollama | Use (an external) Ollama server for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| nvidia | Use NVIDIA NIM for running LLM inference, evaluation and safety |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| meta-reference-gpu | Use Meta Reference for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| llama_api | Distribution for running e2e tests in CI |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| groq | Use Groq for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| fireworks | Use Fireworks.AI for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| experimental-post-training | Experimental template for post training |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| dell | Dell's distribution of Llama Stack. TGI inference via Dell's custom |
|
||||
| | container |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| ci-tests | Distribution for running e2e tests in CI |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| cerebras | Use Cerebras for running LLM inference |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
| bedrock | Use AWS Bedrock for running LLM inference and safety |
|
||||
+------------------------------+-----------------------------------------------------------------------------+
|
||||
```
|
||||
Handy build arguments:
|
||||
|
||||
You may then pick a template to build your distribution with providers fitted to your liking.
|
||||
- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
|
||||
- `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/run.yaml`).
|
||||
- `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
|
||||
- `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
|
||||
- `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
|
||||
- `KEEP_WORKSPACE=1` – retain `/workspace` in the final image if you need to access additional files (such as sample configs or provider bundles).
|
||||
|
||||
For example, to build a distribution with TGI as the inference provider, you can run:
|
||||
```
|
||||
$ llama stack build --distro starter
|
||||
...
|
||||
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
|
||||
```
|
||||
Make sure any custom `build.yaml`, run configs, or provider directories you reference are included in the Docker build context so the Containerfile can read them.
|
||||
|
||||
```{tip}
|
||||
The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="scratch" label="Building from Scratch">
|
||||
<TabItem value="external" label="Building with external providers">
|
||||
|
||||
If the provided templates do not fit your use case, you could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations.
|
||||
External providers live outside the main repository but can be bundled by pointing `external_providers_dir` to a directory that contains your provider packages.
|
||||
|
||||
It would be best to start with a template and understand the structure of the config file and the various concepts ( APIS, providers, resources, etc.) before starting from scratch.
|
||||
```
|
||||
llama stack build
|
||||
1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
|
||||
2. Update `build.yaml` with the directory and provider entries.
|
||||
3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/run.yaml` if you want to bake the config.
|
||||
|
||||
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
|
||||
> Enter the image type you want your Llama Stack to be built as (container or venv): venv
|
||||
|
||||
Llama Stack is composed of several APIs working together. Let's select
|
||||
the provider types (implementations) you want to use for these APIs.
|
||||
|
||||
Tip: use <TAB> to see options for the providers.
|
||||
|
||||
> Enter provider for API inference: inline::meta-reference
|
||||
> Enter provider for API safety: inline::llama-guard
|
||||
> Enter provider for API agents: inline::meta-reference
|
||||
> Enter provider for API memory: inline::faiss
|
||||
> Enter provider for API datasetio: inline::meta-reference
|
||||
> Enter provider for API scoring: inline::meta-reference
|
||||
> Enter provider for API eval: inline::meta-reference
|
||||
> Enter provider for API telemetry: inline::meta-reference
|
||||
|
||||
> (Optional) Enter a short description for your Llama Stack:
|
||||
|
||||
You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml`
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="config" label="Building from a pre-existing build config file">
|
||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
||||
|
||||
- The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
|
||||
|
||||
```
|
||||
llama stack build --config llama_stack/distributions/starter/build.yaml
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="external" label="Building with External Providers">
|
||||
|
||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
|
||||
|
||||
To build a distribution with external providers, you need to:
|
||||
|
||||
1. Configure the `external_providers_dir` in your build configuration file:
|
||||
Example `build.yaml` excerpt for a custom Ollama provider:
|
||||
|
||||
```yaml
|
||||
# Example my-external-stack.yaml with external providers
|
||||
version: '2'
|
||||
distribution_spec:
|
||||
description: Custom distro for CI tests
|
||||
providers:
|
||||
inference:
|
||||
- remote::custom_ollama
|
||||
# Add more providers as needed
|
||||
image_type: container
|
||||
image_name: ci-test
|
||||
# Path to external provider implementations
|
||||
external_providers_dir: ~/.llama/providers.d
|
||||
- remote::custom_ollama
|
||||
external_providers_dir: /workspace/providers.d
|
||||
```
|
||||
|
||||
Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
|
||||
|
||||
```python
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
|
||||
def get_provider_spec() -> ProviderSpec:
|
||||
return ProviderSpec(
|
||||
provider_type="remote::custom_ollama",
|
||||
module="llama_stack_ollama_provider",
|
||||
config_class="llama_stack_ollama_provider.config.OllamaImplConfig",
|
||||
pip_packages=[
|
||||
"ollama",
|
||||
"aiohttp",
|
||||
"llama-stack-provider-ollama",
|
||||
],
|
||||
)
|
||||
```
|
||||
|
||||
Here's an example for a custom Ollama provider:
|
||||
|
|
@ -232,9 +86,9 @@ Here's an example for a custom Ollama provider:
|
|||
adapter:
|
||||
adapter_type: custom_ollama
|
||||
pip_packages:
|
||||
- ollama
|
||||
- aiohttp
|
||||
- llama-stack-provider-ollama # This is the provider package
|
||||
- ollama
|
||||
- aiohttp
|
||||
- llama-stack-provider-ollama # This is the provider package
|
||||
config_class: llama_stack_ollama_provider.config.OllamaImplConfig
|
||||
module: llama_stack_ollama_provider
|
||||
api_dependencies: []
|
||||
|
|
@ -245,53 +99,22 @@ The `pip_packages` section lists the Python packages required by the provider, a
|
|||
provider package itself. The package must be available on PyPI or can be provided from a local
|
||||
directory or a git repository (git must be installed on the build environment).
|
||||
|
||||
2. Build your distribution using the config file:
|
||||
For deeper guidance, see the [External Providers documentation](../providers/external/).
|
||||
|
||||
```
|
||||
llama stack build --config my-external-stack.yaml
|
||||
```
|
||||
|
||||
For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external/).
|
||||
</TabItem>
|
||||
<TabItem value="container" label="Building Container">
|
||||
</Tabs>
|
||||
|
||||
:::tip Podman Alternative
|
||||
Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
||||
:::
|
||||
### Run your stack server
|
||||
|
||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
||||
|
||||
```
|
||||
llama stack build --distro starter --image-type container
|
||||
```
|
||||
|
||||
```
|
||||
$ llama stack build --distro starter --image-type container
|
||||
...
|
||||
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
||||
...
|
||||
```
|
||||
|
||||
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
|
||||
```
|
||||
|
||||
Now set some environment variables for the inference model ID and Llama Stack Port and create a local directory to mount into the container's file system.
|
||||
After building the image, launch it directly with Docker or Podman—the entrypoint calls `llama stack run` using the baked distribution or the bundled run config:
|
||||
|
||||
```bash
|
||||
export INFERENCE_MODEL="llama3.2:3b"
|
||||
export LLAMA_STACK_PORT=8321
|
||||
mkdir -p ~/.llama
|
||||
```
|
||||
|
||||
After this step is successful, you should be able to find the built container image and test it with the below Docker command:
|
||||
|
||||
```
|
||||
docker run -d \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||
localhost/distribution-ollama:dev \
|
||||
llama-stack:starter \
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
|
|
@ -311,131 +134,14 @@ Here are the docker flags and their uses:
|
|||
|
||||
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
### Running your Stack server
|
||||
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack build` step.
|
||||
If you prepared a custom run config, mount it into the container and reference it explicitly:
|
||||
|
||||
```bash
|
||||
docker run \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $(pwd)/run.yaml:/app/run.yaml \
|
||||
llama-stack:starter \
|
||||
/app/run.yaml
|
||||
```
|
||||
llama stack run -h
|
||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
||||
[--image-type {venv}] [--enable-ui]
|
||||
[config | distro]
|
||||
|
||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||
|
||||
positional arguments:
|
||||
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||
--image-name IMAGE_NAME
|
||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||
--image-type {venv}
|
||||
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||
--enable-ui Start the UI server (default: False)
|
||||
```
|
||||
|
||||
**Note:** Container images built with `llama stack build --image-type container` cannot be run using `llama stack run`. Instead, they must be run directly using Docker or Podman commands as shown in the container building section above.
|
||||
|
||||
```
|
||||
# Start using template name
|
||||
llama stack run tgi
|
||||
|
||||
# Start using config file
|
||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
```
|
||||
|
||||
```
|
||||
$ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||
|
||||
Serving API inspect
|
||||
GET /health
|
||||
GET /providers/list
|
||||
GET /routes/list
|
||||
Serving API inference
|
||||
POST /inference/chat_completion
|
||||
POST /inference/completion
|
||||
POST /inference/embeddings
|
||||
...
|
||||
Serving API agents
|
||||
POST /agents/create
|
||||
POST /agents/session/create
|
||||
POST /agents/turn/create
|
||||
POST /agents/delete
|
||||
POST /agents/session/delete
|
||||
POST /agents/session/get
|
||||
POST /agents/step/get
|
||||
POST /agents/turn/get
|
||||
|
||||
Listening on ['::', '0.0.0.0']:8321
|
||||
INFO: Started server process [2935911]
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
||||
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
||||
```
|
||||
|
||||
### Listing Distributions
|
||||
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
||||
|
||||
```
|
||||
llama stack list -h
|
||||
usage: llama stack list [-h]
|
||||
|
||||
list the build stacks
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
```
|
||||
|
||||
Example Usage
|
||||
|
||||
```
|
||||
llama stack list
|
||||
```
|
||||
|
||||
```
|
||||
------------------------------+-----------------------------------------------------------------+--------------+------------+
|
||||
| Stack Name | Path | Build Config | Run Config |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| together | ~/.llama/distributions/together | Yes | No |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| bedrock | ~/.llama/distributions/bedrock | Yes | No |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| starter | ~/.llama/distributions/starter | Yes | Yes |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
| remote-vllm | ~/.llama/distributions/remote-vllm | Yes | Yes |
|
||||
+------------------------------+-----------------------------------------------------------------------------+--------------+
|
||||
```
|
||||
|
||||
### Removing a Distribution
|
||||
Use the remove command to delete a distribution you've previously built.
|
||||
|
||||
```
|
||||
llama stack rm -h
|
||||
usage: llama stack rm [-h] [--all] [name]
|
||||
|
||||
Remove the build stack
|
||||
|
||||
positional arguments:
|
||||
name Name of the stack to delete (default: None)
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--all, -a Delete all stacks (use with caution) (default: False)
|
||||
```
|
||||
|
||||
Example
|
||||
```
|
||||
llama stack rm llamastack-test
|
||||
```
|
||||
|
||||
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they're no longer needed.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
If you encounter any issues, ask questions in our discord or search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ This avoids the overhead of setting up a server.
|
|||
```bash
|
||||
# setup
|
||||
uv pip install llama-stack
|
||||
llama stack build --distro starter --image-type venv
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
```
|
||||
|
||||
```python
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
|
|||
uv venv starter --python 3.12
|
||||
source starter/bin/activate # On Windows: starter\Scripts\activate
|
||||
pip install --no-cache llama-stack==0.2.2
|
||||
llama stack build --distro starter --image-type venv
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
export FIREWORKS_API_KEY=<SOME_KEY>
|
||||
llama stack run starter --port 5050
|
||||
```
|
||||
|
|
|
|||
|
|
@ -166,10 +166,10 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Install the distribution dependencies before launching:
|
||||
|
||||
```bash
|
||||
llama stack build --distro dell --image-type venv
|
||||
llama stack list-deps dell | xargs -L1 uv pip install
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
DEH_URL=$DEH_URL \
|
||||
CHROMA_URL=$CHROMA_URL \
|
||||
|
|
|
|||
|
|
@ -81,10 +81,10 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --distro meta-reference-gpu --image-type venv
|
||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||
--port 8321
|
||||
|
|
|
|||
|
|
@ -136,11 +136,11 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
llama stack run ./run.yaml \
|
||||
|
|
|
|||
|
|
@ -169,7 +169,11 @@ docker run \
|
|||
Ensure you have configured the starter distribution using the environment variables explained above.
|
||||
|
||||
```bash
|
||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
||||
# Install dependencies for the starter distribution
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run the server
|
||||
uv run --with llama-stack llama stack run starter
|
||||
```
|
||||
|
||||
## Example Usage
|
||||
|
|
|
|||
|
|
@ -23,6 +23,17 @@ Another simple way to start interacting with Llama Stack is to just spin up a co
|
|||
If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](../deploying/kubernetes_deployment) for more details.
|
||||
|
||||
|
||||
## Configure logging
|
||||
|
||||
Control log output via environment variables before starting the server.
|
||||
|
||||
- `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`.
|
||||
- Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
|
||||
- Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
|
||||
- `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.
|
||||
|
||||
Export these variables prior to running `llama stack run`, launching a container, or starting the server through any other pathway.
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
|
|
|
|||
|
|
@ -58,15 +58,19 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
|
|||
|
||||
<Tabs>
|
||||
<TabItem value="venv" label="Using venv">
|
||||
You can use Python to build and run the Llama Stack server, which is useful for testing and development.
|
||||
You can use Python to install dependencies and run the Llama Stack server, which is useful for testing and development.
|
||||
|
||||
Llama Stack uses a [YAML configuration file](../distributions/configuration) to specify the stack setup,
|
||||
which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml).
|
||||
Now let's build and run the Llama Stack config for Ollama.
|
||||
Now let's install dependencies and run the Llama Stack config for Ollama.
|
||||
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
|
||||
|
||||
```bash
|
||||
llama stack build --distro starter --image-type venv --run
|
||||
# Install dependencies for the starter distribution
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run the server
|
||||
llama stack run starter
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="container" label="Using a Container">
|
||||
|
|
@ -304,7 +308,7 @@ stream = agent.create_turn(
|
|||
for event in AgentEventLogger().log(stream):
|
||||
event.print()
|
||||
```
|
||||
### ii. Run the Script
|
||||
#### ii. Run the Script
|
||||
Let's run the script using `uv`
|
||||
```bash
|
||||
uv run python agent.py
|
||||
|
|
|
|||
|
|
@ -24,10 +24,13 @@ ollama run llama3.2:3b --keepalive 60m
|
|||
|
||||
#### Step 2: Run the Llama Stack server
|
||||
|
||||
We will use `uv` to run the Llama Stack server.
|
||||
We will use `uv` to install dependencies and run the Llama Stack server.
|
||||
```bash
|
||||
OLLAMA_URL=http://localhost:11434 \
|
||||
uv run --with llama-stack llama stack build --distro starter --image-type venv --run
|
||||
# Install dependencies for the starter distribution
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run the server
|
||||
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||
```
|
||||
#### Step 3: Run the demo
|
||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
---
|
||||
description: "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
description: "Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
sidebar_label: Eval
|
||||
title: Eval
|
||||
---
|
||||
|
|
@ -8,6 +10,8 @@ title: Eval
|
|||
|
||||
## Overview
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
|
||||
This section contains documentation for all available providers for the **eval** API.
|
||||
|
|
|
|||
|
|
@ -240,6 +240,6 @@ additional_pip_packages:
|
|||
- sqlalchemy[asyncio]
|
||||
```
|
||||
|
||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
||||
No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
|
||||
|
||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -2864,7 +2864,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
|
||||
"!llama stack list-deps experimental-post-training | xargs -L1 uv pip install"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
"source": [
|
||||
"# NBVAL_SKIP\n",
|
||||
"!pip install -U llama-stack\n",
|
||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
|
||||
"llama stack list-deps fireworks | xargs -L1 uv pip install\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -136,7 +136,8 @@
|
|||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
||||
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
@ -172,7 +173,7 @@
|
|||
"\n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes using pkill command\n",
|
||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
||||
" os.system(\"pkill -f llama_stack.core.server.server\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -105,7 +105,8 @@
|
|||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
||||
" \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
|
||||
" \"uv run --with llama-stack llama stack run starter\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@
|
|||
"metadata": {},
|
||||
"source": [
|
||||
"```bash\n",
|
||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
||||
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@
|
|||
"metadata": {},
|
||||
"source": [
|
||||
"```bash\n",
|
||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
||||
"uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,366 +1,366 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1e7571c",
|
||||
"metadata": {
|
||||
"id": "c1e7571c"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
|
||||
"\n",
|
||||
"# Llama Stack - Building AI Applications\n",
|
||||
"\n",
|
||||
"<img src=\"https://llamastack.github.io/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
|
||||
"\n",
|
||||
"Get started with Llama Stack in minutes!\n",
|
||||
"\n",
|
||||
"[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
|
||||
"\n",
|
||||
"In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
|
||||
"as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4CV1Q19BDMVw",
|
||||
"metadata": {
|
||||
"id": "4CV1Q19BDMVw"
|
||||
},
|
||||
"source": [
|
||||
"## Step 1: Install and setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "K4AvfUAJZOeS",
|
||||
"metadata": {
|
||||
"id": "K4AvfUAJZOeS"
|
||||
},
|
||||
"source": [
|
||||
"### 1.1. Install uv and test inference with Ollama\n",
|
||||
"\n",
|
||||
"We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a2d7b85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install uv llama_stack llama-stack-client\n",
|
||||
"\n",
|
||||
"## If running on Collab:\n",
|
||||
"# !pip install colab-xterm\n",
|
||||
"# %load_ext colabxterm\n",
|
||||
"\n",
|
||||
"!curl https://ollama.ai/install.sh | sh"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39fa584b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1.2. Test inference with Ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3bf81522",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a7e8e0f1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## If running on Colab:\n",
|
||||
"# %xterm\n",
|
||||
"\n",
|
||||
"## To be ran in the terminal:\n",
|
||||
"# ollama serve &\n",
|
||||
"# ollama run llama3.2:3b --keepalive 60m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f3c5f243",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If successful, you should see the model respond to a prompt.\n",
|
||||
"\n",
|
||||
"...\n",
|
||||
"```\n",
|
||||
">>> hi\n",
|
||||
"Hello! How can I assist you today?\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "oDUB7M_qe-Gs",
|
||||
"metadata": {
|
||||
"id": "oDUB7M_qe-Gs"
|
||||
},
|
||||
"source": [
|
||||
"## Step 2: Run the Llama Stack server\n",
|
||||
"\n",
|
||||
"In this showcase, we will start a Llama Stack server that is running locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "732eadc6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.1. Setup the Llama Stack Server"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "J2kGed0R5PSf",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"collapsed": true,
|
||||
"id": "J2kGed0R5PSf",
|
||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||
"!uv run --with llama-stack llama stack build --distro starter\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
" text=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
||||
" return process\n",
|
||||
"\n",
|
||||
"def wait_for_server_to_start():\n",
|
||||
" import requests\n",
|
||||
" from requests.exceptions import ConnectionError\n",
|
||||
" import time\n",
|
||||
"\n",
|
||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
||||
" max_retries = 30\n",
|
||||
" retry_interval = 1\n",
|
||||
"\n",
|
||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
||||
" for _ in range(max_retries):\n",
|
||||
" try:\n",
|
||||
" response = requests.get(url)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" print(\"\\nServer is ready!\")\n",
|
||||
" return True\n",
|
||||
" except ConnectionError:\n",
|
||||
" print(\".\", end=\"\", flush=True)\n",
|
||||
" time.sleep(retry_interval)\n",
|
||||
"\n",
|
||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# use this helper if needed to kill the server\n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes\n",
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c40e9efd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.2. Start the Llama Stack Server"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f779283d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Starting Llama Stack server with PID: 787100\n",
|
||||
"Waiting for server to start\n",
|
||||
"Server is ready!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"server_process = run_llama_stack_server_background()\n",
|
||||
"assert wait_for_server_to_start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28477c03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 3: Run the demo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7da71011",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
|
||||
"prompt> How do you do great work?\n",
|
||||
"\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
|
||||
"\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
|
||||
"\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
|
||||
"\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
|
||||
"\u001b[30m\u001b[0m"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
|
||||
"\n",
|
||||
"vector_db_id = \"my_demo_vector_db\"\n",
|
||||
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
|
||||
"\n",
|
||||
"models = client.models.list()\n",
|
||||
"\n",
|
||||
"# Select the first ollama and first ollama's embedding model\n",
|
||||
"model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
|
||||
"embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
|
||||
"embedding_model_id = embedding_model.identifier\n",
|
||||
"embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
|
||||
"\n",
|
||||
"_ = client.vector_dbs.register(\n",
|
||||
" vector_db_id=vector_db_id,\n",
|
||||
" embedding_model=embedding_model_id,\n",
|
||||
" embedding_dimension=embedding_dimension,\n",
|
||||
" provider_id=\"faiss\",\n",
|
||||
")\n",
|
||||
"source = \"https://www.paulgraham.com/greatwork.html\"\n",
|
||||
"print(\"rag_tool> Ingesting document:\", source)\n",
|
||||
"document = RAGDocument(\n",
|
||||
" document_id=\"document_1\",\n",
|
||||
" content=source,\n",
|
||||
" mime_type=\"text/html\",\n",
|
||||
" metadata={},\n",
|
||||
")\n",
|
||||
"client.tool_runtime.rag_tool.insert(\n",
|
||||
" documents=[document],\n",
|
||||
" vector_db_id=vector_db_id,\n",
|
||||
" chunk_size_in_tokens=50,\n",
|
||||
")\n",
|
||||
"agent = Agent(\n",
|
||||
" client,\n",
|
||||
" model=model_id,\n",
|
||||
" instructions=\"You are a helpful assistant\",\n",
|
||||
" tools=[\n",
|
||||
" {\n",
|
||||
" \"name\": \"builtin::rag/knowledge_search\",\n",
|
||||
" \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"prompt = \"How do you do great work?\"\n",
|
||||
"print(\"prompt>\", prompt)\n",
|
||||
"\n",
|
||||
"response = agent.create_turn(\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
||||
" session_id=agent.create_session(\"rag_session\"),\n",
|
||||
" stream=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for log in AgentEventLogger().log(response):\n",
|
||||
" log.print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "341aaadf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e88e1185",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next Steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bcb73600",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you're ready to dive deeper into Llama Stack!\n",
|
||||
"- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
|
||||
"- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
|
||||
"- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
|
||||
"- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
|
||||
"- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
|
||||
"- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
|
||||
"- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"gpuType": "T4",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1e7571c",
|
||||
"metadata": {
|
||||
"id": "c1e7571c"
|
||||
},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
|
||||
"\n",
|
||||
"# Llama Stack - Building AI Applications\n",
|
||||
"\n",
|
||||
"<img src=\"https://llamastack.github.io/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
|
||||
"\n",
|
||||
"Get started with Llama Stack in minutes!\n",
|
||||
"\n",
|
||||
"[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
|
||||
"\n",
|
||||
"In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
|
||||
"as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4CV1Q19BDMVw",
|
||||
"metadata": {
|
||||
"id": "4CV1Q19BDMVw"
|
||||
},
|
||||
"source": [
|
||||
"## Step 1: Install and setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "K4AvfUAJZOeS",
|
||||
"metadata": {
|
||||
"id": "K4AvfUAJZOeS"
|
||||
},
|
||||
"source": [
|
||||
"### 1.1. Install uv and test inference with Ollama\n",
|
||||
"\n",
|
||||
"We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a2d7b85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install uv llama_stack llama-stack-client\n",
|
||||
"\n",
|
||||
"## If running on Collab:\n",
|
||||
"# !pip install colab-xterm\n",
|
||||
"# %load_ext colabxterm\n",
|
||||
"\n",
|
||||
"!curl https://ollama.ai/install.sh | sh"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39fa584b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1.2. Test inference with Ollama"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3bf81522",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a7e8e0f1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## If running on Colab:\n",
|
||||
"# %xterm\n",
|
||||
"\n",
|
||||
"## To be ran in the terminal:\n",
|
||||
"# ollama serve &\n",
|
||||
"# ollama run llama3.2:3b --keepalive 60m"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f3c5f243",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If successful, you should see the model respond to a prompt.\n",
|
||||
"\n",
|
||||
"...\n",
|
||||
"```\n",
|
||||
">>> hi\n",
|
||||
"Hello! How can I assist you today?\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "oDUB7M_qe-Gs",
|
||||
"metadata": {
|
||||
"id": "oDUB7M_qe-Gs"
|
||||
},
|
||||
"source": [
|
||||
"## Step 2: Run the Llama Stack server\n",
|
||||
"\n",
|
||||
"In this showcase, we will start a Llama Stack server that is running locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "732eadc6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.1. Setup the Llama Stack Server"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "J2kGed0R5PSf",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"collapsed": true,
|
||||
"id": "J2kGed0R5PSf",
|
||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||
"!uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
" text=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
||||
" return process\n",
|
||||
"\n",
|
||||
"def wait_for_server_to_start():\n",
|
||||
" import requests\n",
|
||||
" from requests.exceptions import ConnectionError\n",
|
||||
" import time\n",
|
||||
"\n",
|
||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
||||
" max_retries = 30\n",
|
||||
" retry_interval = 1\n",
|
||||
"\n",
|
||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
||||
" for _ in range(max_retries):\n",
|
||||
" try:\n",
|
||||
" response = requests.get(url)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" print(\"\\nServer is ready!\")\n",
|
||||
" return True\n",
|
||||
" except ConnectionError:\n",
|
||||
" print(\".\", end=\"\", flush=True)\n",
|
||||
" time.sleep(retry_interval)\n",
|
||||
"\n",
|
||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# use this helper if needed to kill the server\n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes\n",
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c40e9efd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.2. Start the Llama Stack Server"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f779283d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Starting Llama Stack server with PID: 787100\n",
|
||||
"Waiting for server to start\n",
|
||||
"Server is ready!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"server_process = run_llama_stack_server_background()\n",
|
||||
"assert wait_for_server_to_start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28477c03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 3: Run the demo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7da71011",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
|
||||
"prompt> How do you do great work?\n",
|
||||
"\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
|
||||
"\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
|
||||
"\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
|
||||
"\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
|
||||
"\u001b[30m\u001b[0m"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
|
||||
"\n",
|
||||
"vector_db_id = \"my_demo_vector_db\"\n",
|
||||
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
|
||||
"\n",
|
||||
"models = client.models.list()\n",
|
||||
"\n",
|
||||
"# Select the first ollama and first ollama's embedding model\n",
|
||||
"model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
|
||||
"embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
|
||||
"embedding_model_id = embedding_model.identifier\n",
|
||||
"embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
|
||||
"\n",
|
||||
"_ = client.vector_dbs.register(\n",
|
||||
" vector_db_id=vector_db_id,\n",
|
||||
" embedding_model=embedding_model_id,\n",
|
||||
" embedding_dimension=embedding_dimension,\n",
|
||||
" provider_id=\"faiss\",\n",
|
||||
")\n",
|
||||
"source = \"https://www.paulgraham.com/greatwork.html\"\n",
|
||||
"print(\"rag_tool> Ingesting document:\", source)\n",
|
||||
"document = RAGDocument(\n",
|
||||
" document_id=\"document_1\",\n",
|
||||
" content=source,\n",
|
||||
" mime_type=\"text/html\",\n",
|
||||
" metadata={},\n",
|
||||
")\n",
|
||||
"client.tool_runtime.rag_tool.insert(\n",
|
||||
" documents=[document],\n",
|
||||
" vector_db_id=vector_db_id,\n",
|
||||
" chunk_size_in_tokens=50,\n",
|
||||
")\n",
|
||||
"agent = Agent(\n",
|
||||
" client,\n",
|
||||
" model=model_id,\n",
|
||||
" instructions=\"You are a helpful assistant\",\n",
|
||||
" tools=[\n",
|
||||
" {\n",
|
||||
" \"name\": \"builtin::rag/knowledge_search\",\n",
|
||||
" \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"prompt = \"How do you do great work?\"\n",
|
||||
"print(\"prompt>\", prompt)\n",
|
||||
"\n",
|
||||
"response = agent.create_turn(\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
||||
" session_id=agent.create_session(\"rag_session\"),\n",
|
||||
" stream=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for log in AgentEventLogger().log(response):\n",
|
||||
" log.print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "341aaadf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e88e1185",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next Steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bcb73600",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you're ready to dive deeper into Llama Stack!\n",
|
||||
"- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
|
||||
"- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
|
||||
"- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
|
||||
"- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
|
||||
"- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
|
||||
"- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
|
||||
"- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"gpuType": "T4",
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,11 +47,11 @@ function QuickStart() {
|
|||
<pre><code>{`# Install uv and start Ollama
|
||||
ollama run llama3.2:3b --keepalive 60m
|
||||
|
||||
# Install server dependencies
|
||||
uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
|
||||
|
||||
# Run Llama Stack server
|
||||
OLLAMA_URL=http://localhost:11434 \\
|
||||
uv run --with llama-stack \\
|
||||
llama stack build --distro starter \\
|
||||
--image-type venv --run
|
||||
OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
|
||||
|
||||
# Try the Python SDK
|
||||
from llama_stack_client import LlamaStackClient
|
||||
|
|
|
|||
4
docs/static/deprecated-llama-stack-spec.html
vendored
4
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -13449,8 +13449,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Eval",
|
||||
"description": "",
|
||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||
"x-displayName": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "Files",
|
||||
|
|
|
|||
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
4
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -10196,9 +10196,9 @@ tags:
|
|||
- name: Datasets
|
||||
description: ''
|
||||
- name: Eval
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
x-displayName: Evaluations
|
||||
- name: Files
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
|
|
|
|||
|
|
@ -5518,8 +5518,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Eval",
|
||||
"description": "",
|
||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||
"x-displayName": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining (Coming Soon)",
|
||||
|
|
|
|||
|
|
@ -4119,9 +4119,9 @@ tags:
|
|||
- name: Datasets
|
||||
description: ''
|
||||
- name: Eval
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
x-displayName: Evaluations
|
||||
- name: PostTraining (Coming Soon)
|
||||
description: ''
|
||||
x-tagGroups:
|
||||
|
|
|
|||
34
docs/static/llama-stack-spec.html
vendored
34
docs/static/llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
|||
"Conversations"
|
||||
],
|
||||
"summary": "Create a conversation.",
|
||||
"description": "Create a conversation.",
|
||||
"description": "Create a conversation.\nCreate a conversation.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -326,8 +326,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Get a conversation with the given ID.",
|
||||
"description": "Get a conversation with the given ID.",
|
||||
"summary": "Retrieve a conversation.",
|
||||
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -369,8 +369,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Update a conversation's metadata with the given ID.",
|
||||
"description": "Update a conversation's metadata with the given ID.",
|
||||
"summary": "Update a conversation.",
|
||||
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -422,8 +422,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation with the given ID.",
|
||||
"description": "Delete a conversation with the given ID.",
|
||||
"summary": "Delete a conversation.",
|
||||
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -467,8 +467,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "List items in the conversation.",
|
||||
"description": "List items in the conversation.",
|
||||
"summary": "List items.",
|
||||
"description": "List items.\nList items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -597,8 +597,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Create items in the conversation.",
|
||||
"description": "Create items in the conversation.",
|
||||
"summary": "Create items.",
|
||||
"description": "Create items.\nCreate items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -652,8 +652,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Retrieve a conversation item.",
|
||||
"description": "Retrieve a conversation item.",
|
||||
"summary": "Retrieve an item.",
|
||||
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -704,8 +704,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation item.",
|
||||
"description": "Delete a conversation item.",
|
||||
"summary": "Delete an item.",
|
||||
"description": "Delete an item.\nDelete a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -13251,8 +13251,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Conversations",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for conversation management operations."
|
||||
"description": "Protocol for conversation management operations.",
|
||||
"x-displayName": "Conversations"
|
||||
},
|
||||
{
|
||||
"name": "Files",
|
||||
|
|
|
|||
56
docs/static/llama-stack-spec.yaml
vendored
56
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -192,7 +192,10 @@ paths:
|
|||
tags:
|
||||
- Conversations
|
||||
summary: Create a conversation.
|
||||
description: Create a conversation.
|
||||
description: >-
|
||||
Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -222,8 +225,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Get a conversation with the given ID.
|
||||
description: Get a conversation with the given ID.
|
||||
summary: Retrieve a conversation.
|
||||
description: >-
|
||||
Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -252,9 +258,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: >-
|
||||
Update a conversation's metadata with the given ID.
|
||||
summary: Update a conversation.
|
||||
description: >-
|
||||
Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
|
|
@ -290,8 +297,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation with the given ID.
|
||||
description: Delete a conversation with the given ID.
|
||||
summary: Delete a conversation.
|
||||
description: >-
|
||||
Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -321,8 +331,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: List items in the conversation.
|
||||
description: List items in the conversation.
|
||||
summary: List items.
|
||||
description: >-
|
||||
List items.
|
||||
|
||||
List items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -495,8 +508,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Create items in the conversation.
|
||||
description: Create items in the conversation.
|
||||
summary: Create items.
|
||||
description: >-
|
||||
Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -532,8 +548,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Retrieve a conversation item.
|
||||
description: Retrieve a conversation item.
|
||||
summary: Retrieve an item.
|
||||
description: >-
|
||||
Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -568,8 +587,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation item.
|
||||
description: Delete a conversation item.
|
||||
summary: Delete an item.
|
||||
description: >-
|
||||
Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -10146,9 +10168,9 @@ tags:
|
|||
- `background`
|
||||
x-displayName: Agents
|
||||
- name: Conversations
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for conversation management operations.
|
||||
x-displayName: Conversations
|
||||
- name: Files
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
|
|
|
|||
38
docs/static/stainless-llama-stack-spec.html
vendored
38
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -282,7 +282,7 @@
|
|||
"Conversations"
|
||||
],
|
||||
"summary": "Create a conversation.",
|
||||
"description": "Create a conversation.",
|
||||
"description": "Create a conversation.\nCreate a conversation.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
|
@ -326,8 +326,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Get a conversation with the given ID.",
|
||||
"description": "Get a conversation with the given ID.",
|
||||
"summary": "Retrieve a conversation.",
|
||||
"description": "Retrieve a conversation.\nGet a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -369,8 +369,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Update a conversation's metadata with the given ID.",
|
||||
"description": "Update a conversation's metadata with the given ID.",
|
||||
"summary": "Update a conversation.",
|
||||
"description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -422,8 +422,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation with the given ID.",
|
||||
"description": "Delete a conversation with the given ID.",
|
||||
"summary": "Delete a conversation.",
|
||||
"description": "Delete a conversation.\nDelete a conversation with the given ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -467,8 +467,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "List items in the conversation.",
|
||||
"description": "List items in the conversation.",
|
||||
"summary": "List items.",
|
||||
"description": "List items.\nList items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -597,8 +597,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Create items in the conversation.",
|
||||
"description": "Create items in the conversation.",
|
||||
"summary": "Create items.",
|
||||
"description": "Create items.\nCreate items in the conversation.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -652,8 +652,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Retrieve a conversation item.",
|
||||
"description": "Retrieve a conversation item.",
|
||||
"summary": "Retrieve an item.",
|
||||
"description": "Retrieve an item.\nRetrieve a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -704,8 +704,8 @@
|
|||
"tags": [
|
||||
"Conversations"
|
||||
],
|
||||
"summary": "Delete a conversation item.",
|
||||
"description": "Delete a conversation item.",
|
||||
"summary": "Delete an item.",
|
||||
"description": "Delete an item.\nDelete a conversation item.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "conversation_id",
|
||||
|
|
@ -17928,8 +17928,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Conversations",
|
||||
"description": "",
|
||||
"x-displayName": "Protocol for conversation management operations."
|
||||
"description": "Protocol for conversation management operations.",
|
||||
"x-displayName": "Conversations"
|
||||
},
|
||||
{
|
||||
"name": "DatasetIO",
|
||||
|
|
@ -17941,8 +17941,8 @@
|
|||
},
|
||||
{
|
||||
"name": "Eval",
|
||||
"description": "",
|
||||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
"description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
|
||||
"x-displayName": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "Files",
|
||||
|
|
|
|||
60
docs/static/stainless-llama-stack-spec.yaml
vendored
60
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -195,7 +195,10 @@ paths:
|
|||
tags:
|
||||
- Conversations
|
||||
summary: Create a conversation.
|
||||
description: Create a conversation.
|
||||
description: >-
|
||||
Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
|
@ -225,8 +228,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Get a conversation with the given ID.
|
||||
description: Get a conversation with the given ID.
|
||||
summary: Retrieve a conversation.
|
||||
description: >-
|
||||
Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -255,9 +261,10 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: >-
|
||||
Update a conversation's metadata with the given ID.
|
||||
summary: Update a conversation.
|
||||
description: >-
|
||||
Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
|
|
@ -293,8 +300,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation with the given ID.
|
||||
description: Delete a conversation with the given ID.
|
||||
summary: Delete a conversation.
|
||||
description: >-
|
||||
Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -324,8 +334,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: List items in the conversation.
|
||||
description: List items in the conversation.
|
||||
summary: List items.
|
||||
description: >-
|
||||
List items.
|
||||
|
||||
List items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -498,8 +511,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Create items in the conversation.
|
||||
description: Create items in the conversation.
|
||||
summary: Create items.
|
||||
description: >-
|
||||
Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -535,8 +551,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Retrieve a conversation item.
|
||||
description: Retrieve a conversation item.
|
||||
summary: Retrieve an item.
|
||||
description: >-
|
||||
Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -571,8 +590,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Conversations
|
||||
summary: Delete a conversation item.
|
||||
description: Delete a conversation item.
|
||||
summary: Delete an item.
|
||||
description: >-
|
||||
Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
parameters:
|
||||
- name: conversation_id
|
||||
in: path
|
||||
|
|
@ -13533,17 +13555,17 @@ tags:
|
|||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: Conversations
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Protocol for conversation management operations.
|
||||
x-displayName: Conversations
|
||||
- name: DatasetIO
|
||||
description: ''
|
||||
- name: Datasets
|
||||
description: ''
|
||||
- name: Eval
|
||||
description: ''
|
||||
x-displayName: >-
|
||||
description: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
x-displayName: Evaluations
|
||||
- name: Files
|
||||
description: >-
|
||||
This API is used to upload documents that can be used with other Llama Stack
|
||||
|
|
|
|||
|
|
@ -78,17 +78,14 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
|
||||
## Build, Configure, and Run Llama Stack
|
||||
|
||||
1. **Build the Llama Stack**:
|
||||
Build the Llama Stack using the `starter` template:
|
||||
1. **Install dependencies**:
|
||||
```bash
|
||||
uv run --with llama-stack llama stack build --distro starter --image-type venv
|
||||
llama stack list-deps starter | xargs -L1 uv pip install
|
||||
```
|
||||
**Expected Output:**
|
||||
|
||||
2. **Start the distribution**:
|
||||
```bash
|
||||
...
|
||||
Build Successful!
|
||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
||||
llama stack run starter
|
||||
```
|
||||
|
||||
3. **Set the ENV variables by exporting them to the terminal**:
|
||||
|
|
|
|||
|
|
@ -173,7 +173,9 @@ class ConversationItemDeletedResource(BaseModel):
|
|||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Conversations(Protocol):
|
||||
"""Protocol for conversation management operations."""
|
||||
"""Conversations
|
||||
|
||||
Protocol for conversation management operations."""
|
||||
|
||||
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def create_conversation(
|
||||
|
|
@ -181,6 +183,8 @@ class Conversations(Protocol):
|
|||
) -> Conversation:
|
||||
"""Create a conversation.
|
||||
|
||||
Create a conversation.
|
||||
|
||||
:param items: Initial items to include in the conversation context.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
:returns: The created conversation object.
|
||||
|
|
@ -189,7 +193,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def get_conversation(self, conversation_id: str) -> Conversation:
|
||||
"""Get a conversation with the given ID.
|
||||
"""Retrieve a conversation.
|
||||
|
||||
Get a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The conversation object.
|
||||
|
|
@ -198,7 +204,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
|
||||
"""Update a conversation's metadata with the given ID.
|
||||
"""Update a conversation.
|
||||
|
||||
Update a conversation's metadata with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param metadata: Set of key-value pairs that can be attached to an object.
|
||||
|
|
@ -208,7 +216,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
|
||||
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
|
||||
"""Delete a conversation with the given ID.
|
||||
"""Delete a conversation.
|
||||
|
||||
Delete a conversation with the given ID.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:returns: The deleted conversation resource.
|
||||
|
|
@ -217,7 +227,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
|
||||
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
|
||||
"""Create items in the conversation.
|
||||
"""Create items.
|
||||
|
||||
Create items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param items: Items to include in the conversation context.
|
||||
|
|
@ -227,7 +239,9 @@ class Conversations(Protocol):
|
|||
|
||||
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
|
||||
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
|
||||
"""Retrieve a conversation item.
|
||||
"""Retrieve an item.
|
||||
|
||||
Retrieve a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
|
|
@ -244,7 +258,9 @@ class Conversations(Protocol):
|
|||
limit: int | NotGiven = NOT_GIVEN,
|
||||
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||||
) -> ConversationItemList:
|
||||
"""List items in the conversation.
|
||||
"""List items.
|
||||
|
||||
List items in the conversation.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param after: An item ID to list items after, used in pagination.
|
||||
|
|
@ -259,7 +275,9 @@ class Conversations(Protocol):
|
|||
async def openai_delete_conversation_item(
|
||||
self, conversation_id: str, item_id: str
|
||||
) -> ConversationItemDeletedResource:
|
||||
"""Delete a conversation item.
|
||||
"""Delete an item.
|
||||
|
||||
Delete a conversation item.
|
||||
|
||||
:param conversation_id: The conversation identifier.
|
||||
:param item_id: The item identifier.
|
||||
|
|
|
|||
|
|
@ -82,7 +82,9 @@ class EvaluateResponse(BaseModel):
|
|||
|
||||
|
||||
class Eval(Protocol):
|
||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
"""Evaluations
|
||||
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
|
||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
|
|
|
|||
182
llama_stack/cli/stack/_list_deps.py
Normal file
182
llama_stack/cli/stack/_list_deps.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.core.build import get_provider_dependencies
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
BuildProvider,
|
||||
DistributionSpec,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.stack import replace_env_vars
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
|
||||
|
||||
logger = get_logger(name=__name__, category="cli")
|
||||
|
||||
|
||||
# These are the dependencies needed by the distribution server.
|
||||
# `llama-stack` is automatically installed by the installation script.
|
||||
SERVER_DEPENDENCIES = [
|
||||
"aiosqlite",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"uvicorn",
|
||||
"opentelemetry-sdk",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
]
|
||||
|
||||
|
||||
def format_output_deps_only(
|
||||
normal_deps: list[str],
|
||||
special_deps: list[str],
|
||||
external_deps: list[str],
|
||||
uv: bool = False,
|
||||
) -> str:
|
||||
"""Format dependencies as a list."""
|
||||
lines = []
|
||||
|
||||
uv_str = ""
|
||||
if uv:
|
||||
uv_str = "uv pip install "
|
||||
|
||||
# Quote deps with commas
|
||||
quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
|
||||
lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
|
||||
|
||||
for special_dep in special_deps:
|
||||
lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
|
||||
|
||||
for external_dep in external_deps:
|
||||
lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
|
||||
if args.config:
|
||||
try:
|
||||
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
|
||||
|
||||
config_file = resolve_config_or_distro(args.config, Mode.BUILD)
|
||||
except ValueError as e:
|
||||
cprint(
|
||||
f"Could not parse config file {args.config}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if config_file:
|
||||
with open(config_file) as f:
|
||||
try:
|
||||
contents = yaml.safe_load(f)
|
||||
contents = replace_env_vars(contents)
|
||||
build_config = BuildConfig(**contents)
|
||||
build_config.image_type = "venv"
|
||||
except Exception as e:
|
||||
cprint(
|
||||
f"Could not parse config file {config_file}: {e}",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.providers:
|
||||
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||
for api_provider in args.providers.split(","):
|
||||
if "=" not in api_provider:
|
||||
cprint(
|
||||
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
api, provider_type = api_provider.split("=")
|
||||
providers_for_api = get_provider_registry().get(Api(api), None)
|
||||
if providers_for_api is None:
|
||||
cprint(
|
||||
f"{api} is not a valid API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
if provider_type in providers_for_api:
|
||||
provider = BuildProvider(
|
||||
provider_type=provider_type,
|
||||
module=None,
|
||||
)
|
||||
provider_list.setdefault(api, []).append(provider)
|
||||
else:
|
||||
cprint(
|
||||
f"{provider_type} is not a valid provider for the {api} API.",
|
||||
color="red",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
distribution_spec = DistributionSpec(
|
||||
providers=provider_list,
|
||||
description=",".join(args.providers),
|
||||
)
|
||||
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
|
||||
|
||||
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
|
||||
# Add external API dependencies
|
||||
if build_config.external_apis_dir:
|
||||
from llama_stack.core.external import load_external_apis
|
||||
|
||||
external_apis = load_external_apis(build_config)
|
||||
if external_apis:
|
||||
for _, api_spec in external_apis.items():
|
||||
normal_deps.extend(api_spec.pip_packages)
|
||||
|
||||
# Format and output based on requested format
|
||||
output = format_output_deps_only(
|
||||
normal_deps=normal_deps,
|
||||
special_deps=special_deps,
|
||||
external_deps=external_provider_dependencies,
|
||||
uv=args.format == "uv",
|
||||
)
|
||||
|
||||
print(output)
|
||||
|
||||
|
||||
def quote_if_needed(dep):
|
||||
# Add quotes if the dependency contains special characters that need escaping in shell
|
||||
# This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
|
||||
needs_quoting = any(char in dep for char in [",", "<", ">", "="])
|
||||
return f"'{dep}'" if needs_quoting else dep
|
||||
|
||||
|
||||
def quote_special_dep(dep_string):
|
||||
"""
|
||||
Quote individual packages in a special dependency string.
|
||||
Special deps may contain multiple packages and flags like --extra-index-url.
|
||||
We need to quote only the package specs that contain special characters.
|
||||
"""
|
||||
parts = dep_string.split()
|
||||
quoted_parts = []
|
||||
|
||||
for part in parts:
|
||||
# Don't quote flags (they start with -)
|
||||
if part.startswith("-"):
|
||||
quoted_parts.append(part)
|
||||
else:
|
||||
# Quote package specs that need it
|
||||
quoted_parts.append(quote_if_needed(part))
|
||||
|
||||
return " ".join(quoted_parts)
|
||||
|
|
@ -8,6 +8,9 @@ import textwrap
|
|||
|
||||
from llama_stack.cli.stack.utils import ImageType
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, category="cli")
|
||||
|
||||
|
||||
class StackBuild(Subcommand):
|
||||
|
|
@ -16,7 +19,7 @@ class StackBuild(Subcommand):
|
|||
self.parser = subparsers.add_parser(
|
||||
"build",
|
||||
prog="llama stack build",
|
||||
description="Build a Llama stack container",
|
||||
description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
|
|
@ -93,6 +96,9 @@ the build. If not specified, currently active environment will be used if found.
|
|||
)
|
||||
|
||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||
logger.warning(
|
||||
"The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
|
||||
)
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._build import run_stack_build_command
|
||||
|
|
|
|||
51
llama_stack/cli/stack/list_deps.py
Normal file
51
llama_stack/cli/stack/list_deps.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import argparse
|
||||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
|
||||
|
||||
class StackListDeps(Subcommand):
|
||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||
super().__init__()
|
||||
self.parser = subparsers.add_parser(
|
||||
"list-deps",
|
||||
prog="llama stack list-deps",
|
||||
description="list the dependencies for a llama stack distribution",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._run_stack_list_deps_command)
|
||||
|
||||
def _add_arguments(self):
|
||||
self.parser.add_argument(
|
||||
"config",
|
||||
type=str,
|
||||
nargs="?", # Make it optional
|
||||
metavar="config | distro",
|
||||
help="Path to config file to use or name of known distro (llama stack list for a list).",
|
||||
)
|
||||
|
||||
self.parser.add_argument(
|
||||
"--providers",
|
||||
type=str,
|
||||
default=None,
|
||||
help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--format",
|
||||
type=str,
|
||||
choices=["uv", "deps-only"],
|
||||
default="deps-only",
|
||||
help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
|
||||
)
|
||||
|
||||
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
from ._list_deps import run_stack_list_deps_command
|
||||
|
||||
return run_stack_list_deps_command(args)
|
||||
|
|
@ -13,6 +13,7 @@ from llama_stack.cli.subcommand import Subcommand
|
|||
|
||||
from .build import StackBuild
|
||||
from .list_apis import StackListApis
|
||||
from .list_deps import StackListDeps
|
||||
from .list_providers import StackListProviders
|
||||
from .remove import StackRemove
|
||||
from .run import StackRun
|
||||
|
|
@ -39,6 +40,7 @@ class StackParser(Subcommand):
|
|||
subparsers = self.parser.add_subparsers(title="stack_subcommands")
|
||||
|
||||
# Add sub-commands
|
||||
StackListDeps.create(subparsers)
|
||||
StackBuild.create(subparsers)
|
||||
StackListApis.create(subparsers)
|
||||
StackListProviders.create(subparsers)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,28 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import sys
|
||||
from enum import Enum
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.core.datatypes import (
|
||||
BuildConfig,
|
||||
Provider,
|
||||
StackRunConfig,
|
||||
)
|
||||
from llama_stack.core.distribution import get_provider_registry
|
||||
from llama_stack.core.resolver import InvalidProviderError
|
||||
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
||||
from llama_stack.core.utils.dynamic import instantiate_class_type
|
||||
from llama_stack.core.utils.image_types import LlamaStackImageType
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
|
||||
|
||||
|
||||
class ImageType(Enum):
|
||||
|
|
@ -19,3 +40,91 @@ def print_subcommand_description(parser, subparsers):
|
|||
description = subcommand.description
|
||||
description_text += f" {name:<21} {description}\n"
|
||||
parser.epilog = description_text
|
||||
|
||||
|
||||
def generate_run_config(
|
||||
build_config: BuildConfig,
|
||||
build_dir: Path,
|
||||
image_name: str,
|
||||
) -> Path:
|
||||
"""
|
||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||
"""
|
||||
apis = list(build_config.distribution_spec.providers.keys())
|
||||
run_config = StackRunConfig(
|
||||
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
|
||||
image_name=image_name,
|
||||
apis=apis,
|
||||
providers={},
|
||||
external_providers_dir=build_config.external_providers_dir
|
||||
if build_config.external_providers_dir
|
||||
else EXTERNAL_PROVIDERS_DIR,
|
||||
)
|
||||
# build providers dict
|
||||
provider_registry = get_provider_registry(build_config)
|
||||
for api in apis:
|
||||
run_config.providers[api] = []
|
||||
providers = build_config.distribution_spec.providers[api]
|
||||
|
||||
for provider in providers:
|
||||
pid = provider.provider_type.split("::")[-1]
|
||||
|
||||
p = provider_registry[Api(api)][provider.provider_type]
|
||||
if p.deprecation_error:
|
||||
raise InvalidProviderError(p.deprecation_error)
|
||||
|
||||
try:
|
||||
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
|
||||
except (ModuleNotFoundError, ValueError) as exc:
|
||||
# HACK ALERT:
|
||||
# This code executes after building is done, the import cannot work since the
|
||||
# package is either available in the venv or container - not available on the host.
|
||||
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
|
||||
# external
|
||||
cprint(
|
||||
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
|
||||
color="yellow",
|
||||
file=sys.stderr,
|
||||
)
|
||||
# Set config_type to None to avoid UnboundLocalError
|
||||
config_type = None
|
||||
|
||||
if config_type is not None and hasattr(config_type, "sample_run_config"):
|
||||
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
|
||||
else:
|
||||
config = {}
|
||||
|
||||
p_spec = Provider(
|
||||
provider_id=pid,
|
||||
provider_type=provider.provider_type,
|
||||
config=config,
|
||||
module=provider.module,
|
||||
)
|
||||
run_config.providers[api].append(p_spec)
|
||||
|
||||
run_config_file = build_dir / f"{image_name}-run.yaml"
|
||||
|
||||
with open(run_config_file, "w") as f:
|
||||
to_write = json.loads(run_config.model_dump_json())
|
||||
f.write(yaml.dump(to_write, sort_keys=False))
|
||||
|
||||
# Only print this message for non-container builds since it will be displayed before the
|
||||
# container is built
|
||||
# For non-container builds, the run.yaml is generated at the very end of the build process so it
|
||||
# makes sense to display this message
|
||||
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
|
||||
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
|
||||
return run_config_file
|
||||
|
||||
|
||||
@lru_cache
|
||||
def available_templates_specs() -> dict[str, BuildConfig]:
|
||||
import yaml
|
||||
|
||||
template_specs = {}
|
||||
for p in TEMPLATES_PATH.rglob("*build.yaml"):
|
||||
template_name = p.parent.name
|
||||
with open(p) as f:
|
||||
build_config = BuildConfig(**yaml.safe_load(f))
|
||||
template_specs[template_name] = build_config
|
||||
return template_specs
|
||||
|
|
|
|||
|
|
@ -338,7 +338,7 @@ fi
|
|||
# Add other require item commands genearic to all containers
|
||||
add_to_container << EOF
|
||||
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
|
||||
RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
|
||||
EOF
|
||||
|
||||
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import importlib
|
||||
import importlib.metadata
|
||||
import inspect
|
||||
from typing import Any
|
||||
|
||||
|
|
|
|||
|
|
@ -42,3 +42,8 @@ def sync_test_context_from_provider_data():
|
|||
return TEST_CONTEXT.set(provider_data["__test_id"])
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_debug_mode() -> bool:
|
||||
"""Check if test recording debug mode is enabled via LLAMA_STACK_TEST_DEBUG env var."""
|
||||
return os.environ.get("LLAMA_STACK_TEST_DEBUG", "").lower() in ("1", "true", "yes")
|
||||
|
|
|
|||
|
|
@ -42,25 +42,25 @@ def resolve_config_or_distro(
|
|||
# Strategy 1: Try as file path first
|
||||
config_path = Path(config_or_distro)
|
||||
if config_path.exists() and config_path.is_file():
|
||||
logger.info(f"Using file path: {config_path}")
|
||||
logger.debug(f"Using file path: {config_path}")
|
||||
return config_path.resolve()
|
||||
|
||||
# Strategy 2: Try as distribution name (if no .yaml extension)
|
||||
if not config_or_distro.endswith(".yaml"):
|
||||
distro_config = _get_distro_config_path(config_or_distro, mode)
|
||||
if distro_config.exists():
|
||||
logger.info(f"Using distribution: {distro_config}")
|
||||
logger.debug(f"Using distribution: {distro_config}")
|
||||
return distro_config
|
||||
|
||||
# Strategy 3: Try as built distribution name
|
||||
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||
if distrib_config.exists():
|
||||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
logger.debug(f"Using built distribution: {distrib_config}")
|
||||
return distrib_config
|
||||
|
||||
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
|
||||
if distrib_config.exists():
|
||||
logger.info(f"Using built distribution: {distrib_config}")
|
||||
logger.debug(f"Using built distribution: {distrib_config}")
|
||||
return distrib_config
|
||||
|
||||
# Strategy 4: Failed - provide helpful error
|
||||
|
|
|
|||
|
|
@ -70,10 +70,10 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
Make sure you have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type venv
|
||||
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/{{ name }}/run.yaml \
|
||||
--port 8321
|
||||
|
|
|
|||
|
|
@ -126,11 +126,11 @@ docker run \
|
|||
|
||||
### Via venv
|
||||
|
||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
||||
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
llama stack build --distro nvidia --image-type venv
|
||||
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
llama stack run ./run.yaml \
|
||||
|
|
|
|||
|
|
@ -79,7 +79,6 @@ class TelemetryAdapter(Telemetry):
|
|||
metrics.set_meter_provider(metric_provider)
|
||||
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
|
||||
self._lock = _global_lock
|
||||
|
||||
async def initialize(self) -> None:
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ The following example shows how to create a chat completion for an NVIDIA NIM.
|
|||
|
||||
```python
|
||||
response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
|
|
@ -67,37 +67,40 @@ print(f"Response: {response.choices[0].message.content}")
|
|||
The following example shows how to do tool calling for an NVIDIA NIM.
|
||||
|
||||
```python
|
||||
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
|
||||
|
||||
tool_definition = ToolDefinition(
|
||||
tool_name="get_weather",
|
||||
description="Get current weather information for a location",
|
||||
parameters={
|
||||
"location": ToolParamDefinition(
|
||||
param_type="string",
|
||||
description="The city and state, e.g. San Francisco, CA",
|
||||
required=True,
|
||||
),
|
||||
"unit": ToolParamDefinition(
|
||||
param_type="string",
|
||||
description="Temperature unit (celsius or fahrenheit)",
|
||||
required=False,
|
||||
default="celsius",
|
||||
),
|
||||
tool_definition = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get current weather information for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"description": "Temperature unit (celsius or fahrenheit)",
|
||||
"default": "celsius",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
tool_response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
|
||||
tools=[tool_definition],
|
||||
)
|
||||
|
||||
print(f"Tool Response: {tool_response.choices[0].message.content}")
|
||||
print(f"Response content: {tool_response.choices[0].message.content}")
|
||||
if tool_response.choices[0].message.tool_calls:
|
||||
for tool_call in tool_response.choices[0].message.tool_calls:
|
||||
print(f"Tool Called: {tool_call.tool_name}")
|
||||
print(f"Arguments: {tool_call.arguments}")
|
||||
print(f"Tool Called: {tool_call.function.name}")
|
||||
print(f"Arguments: {tool_call.function.arguments}")
|
||||
```
|
||||
|
||||
### Structured Output Example
|
||||
|
|
@ -105,33 +108,26 @@ if tool_response.choices[0].message.tool_calls:
|
|||
The following example shows how to do structured output for an NVIDIA NIM.
|
||||
|
||||
```python
|
||||
from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
|
||||
|
||||
person_schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer"},
|
||||
"age": {"type": "number"},
|
||||
"occupation": {"type": "string"},
|
||||
},
|
||||
"required": ["name", "age", "occupation"],
|
||||
}
|
||||
|
||||
response_format = JsonSchemaResponseFormat(
|
||||
type=ResponseFormatType.json_schema, json_schema=person_schema
|
||||
)
|
||||
|
||||
structured_response = client.chat.completions.create(
|
||||
model="meta-llama/Llama-3.1-8B-Instruct",
|
||||
model="nvidia/meta/llama-3.1-8b-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
|
||||
}
|
||||
],
|
||||
response_format=response_format,
|
||||
extra_body={"nvext": {"guided_json": person_schema}},
|
||||
)
|
||||
|
||||
print(f"Structured Response: {structured_response.choices[0].message.content}")
|
||||
```
|
||||
|
||||
|
|
@ -141,7 +137,7 @@ The following example shows how to create embeddings for an NVIDIA NIM.
|
|||
|
||||
```python
|
||||
response = client.embeddings.create(
|
||||
model="nvidia/llama-3.2-nv-embedqa-1b-v2",
|
||||
model="nvidia/nvidia/llama-3.2-nv-embedqa-1b-v2",
|
||||
input=["What is the capital of France?"],
|
||||
extra_body={"input_type": "query"},
|
||||
)
|
||||
|
|
@ -163,15 +159,15 @@ image_path = {path_to_the_image}
|
|||
demo_image_b64 = load_image_as_base64(image_path)
|
||||
|
||||
vlm_response = client.chat.completions.create(
|
||||
model="nvidia/vila",
|
||||
model="nvidia/meta/llama-3.2-11b-vision-instruct",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": demo_image_b64,
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{demo_image_b64}",
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -19,15 +19,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|||
|
||||
"""
|
||||
NVIDIA Inference Adapter for Llama Stack.
|
||||
|
||||
Note: The inheritance order is important here. OpenAIMixin must come before
|
||||
ModelRegistryHelper to ensure that OpenAIMixin.check_model_availability()
|
||||
is used instead of ModelRegistryHelper.check_model_availability(). It also
|
||||
must come before Inference to ensure that OpenAIMixin methods are available
|
||||
in the Inference interface.
|
||||
|
||||
- OpenAIMixin.check_model_availability() queries the NVIDIA API to check if a model exists
|
||||
- ModelRegistryHelper.check_model_availability() just returns False and shows a warning
|
||||
"""
|
||||
|
||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
|
|||
"__class__": class_name,
|
||||
"__method__": method_name,
|
||||
"__type__": span_type,
|
||||
"__args__": str(combined_args),
|
||||
"__args__": json.dumps(combined_args),
|
||||
}
|
||||
|
||||
return class_name, method_name, span_attributes
|
||||
|
|
@ -82,8 +82,8 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
|
|||
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
|
||||
|
||||
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
|
||||
count = 0
|
||||
try:
|
||||
count = 0
|
||||
async for item in method(self, *args, **kwargs):
|
||||
yield item
|
||||
count += 1
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ _id_counters: dict[str, dict[str, int]] = {}
|
|||
# Test context uses ContextVar since it changes per-test and needs async isolation
|
||||
from openai.types.completion_choice import CompletionChoice
|
||||
|
||||
from llama_stack.core.testing_context import get_test_context
|
||||
from llama_stack.core.testing_context import get_test_context, is_debug_mode
|
||||
|
||||
# update the "finish_reason" field, since its type definition is wrong (no None is accepted)
|
||||
CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
|
||||
|
|
@ -146,6 +146,7 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
|||
|
||||
body_for_hash = _normalize_body_for_hash(body)
|
||||
|
||||
test_id = get_test_context()
|
||||
normalized: dict[str, Any] = {
|
||||
"method": method.upper(),
|
||||
"endpoint": parsed.path,
|
||||
|
|
@ -154,10 +155,20 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
|
|||
|
||||
# Include test_id for isolation, except for shared infrastructure endpoints
|
||||
if parsed.path not in ("/api/tags", "/v1/models"):
|
||||
normalized["test_id"] = get_test_context()
|
||||
normalized["test_id"] = test_id
|
||||
|
||||
normalized_json = json.dumps(normalized, sort_keys=True)
|
||||
return hashlib.sha256(normalized_json.encode()).hexdigest()
|
||||
request_hash = hashlib.sha256(normalized_json.encode()).hexdigest()
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Hash computation:")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" Method: {method.upper()}")
|
||||
logger.info(f" Endpoint: {parsed.path}")
|
||||
logger.info(f" Model: {body.get('model', 'N/A')}")
|
||||
logger.info(f" Computed hash: {request_hash}")
|
||||
|
||||
return request_hash
|
||||
|
||||
|
||||
def normalize_tool_request(provider_name: str, tool_name: str, kwargs: dict[str, Any]) -> str:
|
||||
|
|
@ -212,6 +223,11 @@ def patch_httpx_for_test_id():
|
|||
provider_data["__test_id"] = test_id
|
||||
request.headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Injected test ID into request header:")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" URL: {request.url}")
|
||||
|
||||
return None
|
||||
|
||||
LlamaStackClient._prepare_request = patched_prepare_request
|
||||
|
|
@ -355,12 +371,35 @@ class ResponseStorage:
|
|||
test_file = test_id.split("::")[0] # Remove test function part
|
||||
test_dir = Path(test_file).parent # Get parent directory
|
||||
|
||||
# Put recordings in a "recordings" subdirectory of the test's parent dir
|
||||
# e.g., "tests/integration/inference" -> "tests/integration/inference/recordings"
|
||||
return test_dir / "recordings"
|
||||
if self.base_dir.is_absolute():
|
||||
repo_root = self.base_dir.parent.parent.parent
|
||||
result = repo_root / test_dir / "recordings"
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Path resolution (absolute base_dir):")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" Base dir: {self.base_dir}")
|
||||
logger.info(f" Repo root: {repo_root}")
|
||||
logger.info(f" Test file: {test_file}")
|
||||
logger.info(f" Test dir: {test_dir}")
|
||||
logger.info(f" Recordings dir: {result}")
|
||||
return result
|
||||
else:
|
||||
result = test_dir / "recordings"
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Path resolution (relative base_dir):")
|
||||
logger.info(f" Test ID: {test_id}")
|
||||
logger.info(f" Base dir: {self.base_dir}")
|
||||
logger.info(f" Test dir: {test_dir}")
|
||||
logger.info(f" Recordings dir: {result}")
|
||||
return result
|
||||
else:
|
||||
# Fallback for non-test contexts
|
||||
return self.base_dir / "recordings"
|
||||
result = self.base_dir / "recordings"
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Path resolution (no test context):")
|
||||
logger.info(f" Base dir: {self.base_dir}")
|
||||
logger.info(f" Recordings dir: {result}")
|
||||
return result
|
||||
|
||||
def _ensure_directory(self):
|
||||
"""Ensure test-specific directories exist."""
|
||||
|
|
@ -395,6 +434,13 @@ class ResponseStorage:
|
|||
|
||||
response_path = responses_dir / response_file
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Storing recording:")
|
||||
logger.info(f" Request hash: {request_hash}")
|
||||
logger.info(f" File: {response_path}")
|
||||
logger.info(f" Test ID: {get_test_context()}")
|
||||
logger.info(f" Endpoint: {endpoint}")
|
||||
|
||||
# Save response to JSON file with metadata
|
||||
with open(response_path, "w") as f:
|
||||
json.dump(
|
||||
|
|
@ -423,16 +469,33 @@ class ResponseStorage:
|
|||
test_dir = self._get_test_dir()
|
||||
response_path = test_dir / response_file
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Looking up recording:")
|
||||
logger.info(f" Request hash: {request_hash}")
|
||||
logger.info(f" Primary path: {response_path}")
|
||||
logger.info(f" Primary exists: {response_path.exists()}")
|
||||
|
||||
if response_path.exists():
|
||||
if is_debug_mode():
|
||||
logger.info(" Found in primary location")
|
||||
return _recording_from_file(response_path)
|
||||
|
||||
# Fallback to base recordings directory (for session-level recordings)
|
||||
fallback_dir = self.base_dir / "recordings"
|
||||
fallback_path = fallback_dir / response_file
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info(f" Fallback path: {fallback_path}")
|
||||
logger.info(f" Fallback exists: {fallback_path.exists()}")
|
||||
|
||||
if fallback_path.exists():
|
||||
if is_debug_mode():
|
||||
logger.info(" Found in fallback location")
|
||||
return _recording_from_file(fallback_path)
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info(" Recording not found in either location")
|
||||
|
||||
return None
|
||||
|
||||
def _model_list_responses(self, request_hash: str) -> list[dict[str, Any]]:
|
||||
|
|
@ -588,6 +651,13 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
mode = _current_mode
|
||||
storage = _current_storage
|
||||
|
||||
if is_debug_mode():
|
||||
logger.info("[RECORDING DEBUG] Entering inference method:")
|
||||
logger.info(f" Mode: {mode}")
|
||||
logger.info(f" Client type: {client_type}")
|
||||
logger.info(f" Endpoint: {endpoint}")
|
||||
logger.info(f" Test context: {get_test_context()}")
|
||||
|
||||
if mode == APIRecordingMode.LIVE or storage is None:
|
||||
if endpoint == "/v1/models":
|
||||
return original_method(self, *args, **kwargs)
|
||||
|
|
@ -643,6 +713,18 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
return response_body
|
||||
elif mode == APIRecordingMode.REPLAY:
|
||||
# REPLAY mode requires recording to exist
|
||||
if is_debug_mode():
|
||||
logger.error("[RECORDING DEBUG] Recording not found!")
|
||||
logger.error(f" Mode: {mode}")
|
||||
logger.error(f" Request hash: {request_hash}")
|
||||
logger.error(f" Method: {method}")
|
||||
logger.error(f" URL: {url}")
|
||||
logger.error(f" Endpoint: {endpoint}")
|
||||
logger.error(f" Model: {body.get('model', 'unknown')}")
|
||||
logger.error(f" Test context: {get_test_context()}")
|
||||
logger.error(
|
||||
f" Stack config type: {os.environ.get('LLAMA_STACK_TEST_STACK_CONFIG_TYPE', 'library_client')}"
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"Recording not found for request hash: {request_hash}\n"
|
||||
f"Model: {body.get('model', 'unknown')} | Request: {method} {url}\n"
|
||||
|
|
|
|||
2649
llama_stack/ui/package-lock.json
generated
2649
llama_stack/ui/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -43,16 +43,16 @@
|
|||
"@testing-library/dom": "^10.4.1",
|
||||
"@testing-library/jest-dom": "^6.8.0",
|
||||
"@testing-library/react": "^16.3.0",
|
||||
"@types/jest": "^29.5.14",
|
||||
"@types/jest": "^30.0.0",
|
||||
"@types/node": "^24",
|
||||
"@types/react": "^19",
|
||||
"@types/react-dom": "^19",
|
||||
"eslint": "^9",
|
||||
"eslint-config-next": "15.5.2",
|
||||
"eslint-config-next": "15.5.6",
|
||||
"eslint-config-prettier": "^10.1.8",
|
||||
"eslint-plugin-prettier": "^5.5.4",
|
||||
"jest": "^29.7.0",
|
||||
"jest-environment-jsdom": "^30.1.2",
|
||||
"jest": "^30.2.0",
|
||||
"jest-environment-jsdom": "^30.2.0",
|
||||
"prettier": "3.6.2",
|
||||
"tailwindcss": "^4",
|
||||
"ts-node": "^10.9.2",
|
||||
|
|
|
|||
370
scripts/diagnose_recordings.py
Executable file
370
scripts/diagnose_recordings.py
Executable file
|
|
@ -0,0 +1,370 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""
|
||||
Diagnostic tool for debugging test recording issues.
|
||||
|
||||
Usage:
|
||||
# Find where a hash would be looked up
|
||||
./scripts/diagnose_recordings.py find-hash 7526c930eab04ce337496a26cd15f2591d7943035f2527182861643da9b837a7
|
||||
|
||||
# Show what's in a recording file
|
||||
./scripts/diagnose_recordings.py show tests/integration/agents/recordings/7526c930....json
|
||||
|
||||
# List all recordings for a test
|
||||
./scripts/diagnose_recordings.py list-test "tests/integration/agents/test_agents.py::test_custom_tool"
|
||||
|
||||
# Explain lookup paths for a test
|
||||
./scripts/diagnose_recordings.py explain-paths --test-id "tests/integration/agents/test_agents.py::test_foo"
|
||||
|
||||
# Compare request hash computation
|
||||
./scripts/diagnose_recordings.py compute-hash --endpoint /v1/chat/completions --method POST --body '{"model":"llama3.2:3b"}' --test-id "..."
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path to import from llama_stack
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
try:
|
||||
from llama_stack.testing.api_recorder import normalize_inference_request
|
||||
except ImportError:
|
||||
normalize_inference_request = None
|
||||
|
||||
|
||||
def find_hash(hash_value: str, base_dir: Path | None = None, test_id: str | None = None):
|
||||
"""Find where a hash would be looked up and what exists"""
|
||||
if base_dir is None:
|
||||
base_dir = REPO_ROOT / "tests/integration/common"
|
||||
|
||||
print(f"Searching for hash: {hash_value}\n")
|
||||
print(f"Base dir: {base_dir} (absolute={base_dir.is_absolute()})")
|
||||
|
||||
# Compute test directory
|
||||
if test_id:
|
||||
test_file = test_id.split("::")[0]
|
||||
test_dir = Path(test_file).parent
|
||||
|
||||
if base_dir.is_absolute():
|
||||
repo_root = base_dir.parent.parent.parent
|
||||
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||
else:
|
||||
test_recordings_dir = test_dir / "recordings"
|
||||
print(f"Test ID: {test_id}")
|
||||
print(f"Test dir: {test_recordings_dir}\n")
|
||||
else:
|
||||
test_recordings_dir = base_dir / "recordings"
|
||||
print("No test ID provided, using base dir\n")
|
||||
|
||||
# Check primary location
|
||||
response_file = f"{hash_value}.json"
|
||||
response_path = test_recordings_dir / response_file
|
||||
|
||||
print("Checking primary location:")
|
||||
print(f" {response_path}")
|
||||
if response_path.exists():
|
||||
print(" EXISTS")
|
||||
print("\nFound! Contents:")
|
||||
show_recording(response_path)
|
||||
return True
|
||||
else:
|
||||
print(" Does not exist")
|
||||
|
||||
# Check fallback location
|
||||
fallback_dir = base_dir / "recordings"
|
||||
fallback_path = fallback_dir / response_file
|
||||
|
||||
print("\nChecking fallback location:")
|
||||
print(f" {fallback_path}")
|
||||
if fallback_path.exists():
|
||||
print(" EXISTS")
|
||||
print("\nFound in fallback! Contents:")
|
||||
show_recording(fallback_path)
|
||||
return True
|
||||
else:
|
||||
print(" Does not exist")
|
||||
|
||||
# Show what files DO exist
|
||||
print(f"\nFiles in test directory ({test_recordings_dir}):")
|
||||
if test_recordings_dir.exists():
|
||||
json_files = list(test_recordings_dir.glob("*.json"))
|
||||
if json_files:
|
||||
for f in json_files[:20]:
|
||||
print(f" - {f.name}")
|
||||
if len(json_files) > 20:
|
||||
print(f" ... and {len(json_files) - 20} more")
|
||||
else:
|
||||
print(" (empty)")
|
||||
else:
|
||||
print(" Directory does not exist")
|
||||
|
||||
print(f"\nFiles in fallback directory ({fallback_dir}):")
|
||||
if fallback_dir.exists():
|
||||
json_files = list(fallback_dir.glob("*.json"))
|
||||
if json_files:
|
||||
for f in json_files[:20]:
|
||||
print(f" - {f.name}")
|
||||
if len(json_files) > 20:
|
||||
print(f" ... and {len(json_files) - 20} more")
|
||||
else:
|
||||
print(" (empty)")
|
||||
else:
|
||||
print(" Directory does not exist")
|
||||
|
||||
# Try partial hash match
|
||||
print("\nLooking for partial matches (first 16 chars)...")
|
||||
partial = hash_value[:16]
|
||||
matches = []
|
||||
|
||||
for dir_to_search in [test_recordings_dir, fallback_dir]:
|
||||
if dir_to_search.exists():
|
||||
for f in dir_to_search.glob("*.json"):
|
||||
if f.stem.startswith(partial):
|
||||
matches.append(f)
|
||||
|
||||
if matches:
|
||||
print(f"Found {len(matches)} partial match(es):")
|
||||
for m in matches:
|
||||
print(f" {m}")
|
||||
else:
|
||||
print("No partial matches found")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def show_recording(file_path: Path):
|
||||
"""Show contents of a recording file"""
|
||||
if not file_path.exists():
|
||||
print(f"File does not exist: {file_path}")
|
||||
return
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
print(f"\nRecording: {file_path.name}\n")
|
||||
print(f"Test ID: {data.get('test_id', 'N/A')}")
|
||||
print("\nRequest:")
|
||||
req = data.get("request", {})
|
||||
print(f" Method: {req.get('method', 'N/A')}")
|
||||
print(f" URL: {req.get('url', 'N/A')}")
|
||||
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
|
||||
print(f" Model: {req.get('model', 'N/A')}")
|
||||
|
||||
body = req.get("body", {})
|
||||
if body:
|
||||
print("\nRequest Body:")
|
||||
print(f" Model: {body.get('model', 'N/A')}")
|
||||
print(f" Stream: {body.get('stream', 'N/A')}")
|
||||
if "messages" in body:
|
||||
print(f" Messages: {len(body['messages'])} message(s)")
|
||||
for i, msg in enumerate(body["messages"][:3]):
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, str):
|
||||
preview = content[:80] + "..." if len(content) > 80 else content
|
||||
else:
|
||||
preview = f"[{type(content).__name__}]"
|
||||
print(f" [{i}] {role}: {preview}")
|
||||
if "tools" in body:
|
||||
print(f" Tools: {len(body['tools'])} tool(s)")
|
||||
|
||||
response = data.get("response", {})
|
||||
if response:
|
||||
print("\nResponse:")
|
||||
print(f" Is streaming: {response.get('is_streaming', False)}")
|
||||
response_body = response.get("body", {})
|
||||
if isinstance(response_body, dict):
|
||||
if "__type__" in response_body:
|
||||
print(f" Type: {response_body['__type__']}")
|
||||
if "__data__" in response_body:
|
||||
response_data = response_body["__data__"]
|
||||
if "choices" in response_data:
|
||||
print(f" Choices: {len(response_data['choices'])}")
|
||||
if "usage" in response_data:
|
||||
usage = response_data["usage"]
|
||||
print(f" Usage: in={usage.get('input_tokens')}, out={usage.get('output_tokens')}")
|
||||
|
||||
|
||||
def list_test_recordings(test_id: str, base_dir: Path | None = None):
|
||||
"""List all recordings for a specific test"""
|
||||
if base_dir is None:
|
||||
base_dir = REPO_ROOT / "tests/integration/common"
|
||||
|
||||
test_file = test_id.split("::")[0]
|
||||
test_dir = Path(test_file).parent
|
||||
|
||||
if base_dir.is_absolute():
|
||||
repo_root = base_dir.parent.parent.parent
|
||||
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||
else:
|
||||
test_recordings_dir = test_dir / "recordings"
|
||||
|
||||
print(f"Recordings for test: {test_id}\n")
|
||||
print(f"Directory: {test_recordings_dir}\n")
|
||||
|
||||
if not test_recordings_dir.exists():
|
||||
print("Directory does not exist")
|
||||
return
|
||||
|
||||
# Find all recordings for this specific test
|
||||
recordings = []
|
||||
for f in test_recordings_dir.glob("*.json"):
|
||||
try:
|
||||
with open(f) as fp:
|
||||
data = json.load(fp)
|
||||
if data.get("test_id") == test_id:
|
||||
recordings.append((f, data))
|
||||
except Exception as e:
|
||||
print(f"Could not read {f.name}: {e}")
|
||||
|
||||
if not recordings:
|
||||
print("No recordings found for this exact test ID")
|
||||
print("\nAll files in directory:")
|
||||
for f in test_recordings_dir.glob("*.json"):
|
||||
print(f" - {f.name}")
|
||||
return
|
||||
|
||||
print(f"Found {len(recordings)} recording(s):\n")
|
||||
for f, data in recordings:
|
||||
req = data.get("request", {})
|
||||
print(f" {f.name}")
|
||||
print(f" Endpoint: {req.get('endpoint', 'N/A')}")
|
||||
print(f" Model: {req.get('model', 'N/A')}")
|
||||
print("")
|
||||
|
||||
|
||||
def explain_paths(test_id: str | None = None, base_dir: Path | None = None):
|
||||
"""Explain where recordings would be searched"""
|
||||
if base_dir is None:
|
||||
base_dir = REPO_ROOT / "tests/integration/common"
|
||||
|
||||
print("Recording Lookup Path Explanation\n")
|
||||
print(f"Base directory: {base_dir}")
|
||||
print(f" Absolute: {base_dir.is_absolute()}")
|
||||
print("")
|
||||
|
||||
if test_id:
|
||||
print(f"Test ID: {test_id}")
|
||||
test_file = test_id.split("::")[0]
|
||||
print(f" Test file: {test_file}")
|
||||
|
||||
test_dir = Path(test_file).parent
|
||||
print(f" Test dir (relative): {test_dir}")
|
||||
|
||||
if base_dir.is_absolute():
|
||||
repo_root = base_dir.parent.parent.parent
|
||||
print(f" Repo root: {repo_root}")
|
||||
test_recordings_dir = repo_root / test_dir / "recordings"
|
||||
print(f" Test recordings dir (absolute): {test_recordings_dir}")
|
||||
else:
|
||||
test_recordings_dir = test_dir / "recordings"
|
||||
print(f" Test recordings dir (relative): {test_recordings_dir}")
|
||||
|
||||
print("\nLookup order for recordings:")
|
||||
print(f" 1. Test-specific: {test_recordings_dir}/<hash>.json")
|
||||
print(f" 2. Fallback: {base_dir}/recordings/<hash>.json")
|
||||
|
||||
else:
|
||||
print("No test ID provided")
|
||||
print("\nLookup location:")
|
||||
print(f" {base_dir}/recordings/<hash>.json")
|
||||
|
||||
|
||||
def compute_hash(endpoint: str, method: str, body_json: str, test_id: str | None = None):
|
||||
"""Compute hash for a request"""
|
||||
if normalize_inference_request is None:
|
||||
print("Could not import normalize_inference_request from llama_stack.testing.api_recorder")
|
||||
print("Make sure you're running from the repo root with proper PYTHONPATH")
|
||||
return
|
||||
|
||||
try:
|
||||
body = json.loads(body_json)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Invalid JSON in body: {e}")
|
||||
return
|
||||
|
||||
# Create a fake URL with the endpoint
|
||||
url = f"http://example.com{endpoint}"
|
||||
|
||||
# Set test context if provided
|
||||
if test_id:
|
||||
from llama_stack.core.testing_context import set_test_context
|
||||
|
||||
set_test_context(test_id)
|
||||
|
||||
hash_result = normalize_inference_request(method, url, {}, body)
|
||||
|
||||
print("Hash Computation\n")
|
||||
print(f"Method: {method}")
|
||||
print(f"Endpoint: {endpoint}")
|
||||
print(f"Test ID: {test_id or 'None (excluded from hash for model-list endpoints)'}")
|
||||
print("\nBody:")
|
||||
print(json.dumps(body, indent=2))
|
||||
print(f"\nComputed Hash: {hash_result}")
|
||||
print(f"\nLooking for file: {hash_result}.json")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Diagnostic tool for test recording issues",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__,
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||
|
||||
# find-hash command
|
||||
find_parser = subparsers.add_parser("find-hash", help="Find where a hash would be looked up")
|
||||
find_parser.add_argument("hash", help="Hash value to search for (full or partial)")
|
||||
find_parser.add_argument("--test-id", help="Test ID to determine search paths")
|
||||
find_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||
|
||||
# show command
|
||||
show_parser = subparsers.add_parser("show", help="Show contents of a recording file")
|
||||
show_parser.add_argument("file", type=Path, help="Path to recording JSON file")
|
||||
|
||||
# list-test command
|
||||
list_parser = subparsers.add_parser("list-test", help="List all recordings for a test")
|
||||
list_parser.add_argument("test_id", help="Full test ID (e.g., tests/integration/agents/test_agents.py::test_foo)")
|
||||
list_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||
|
||||
# explain-paths command
|
||||
explain_parser = subparsers.add_parser("explain-paths", help="Explain where recordings are searched")
|
||||
explain_parser.add_argument("--test-id", help="Test ID to show paths for")
|
||||
explain_parser.add_argument("--base-dir", type=Path, help="Base directory (default: tests/integration/common)")
|
||||
|
||||
# compute-hash command
|
||||
hash_parser = subparsers.add_parser("compute-hash", help="Compute hash for a request")
|
||||
hash_parser.add_argument("--endpoint", required=True, help="Endpoint path (e.g., /v1/chat/completions)")
|
||||
hash_parser.add_argument("--method", default="POST", help="HTTP method (default: POST)")
|
||||
hash_parser.add_argument("--body", required=True, help="Request body as JSON string")
|
||||
hash_parser.add_argument("--test-id", help="Test ID (affects hash for non-model-list endpoints)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return
|
||||
|
||||
if args.command == "find-hash":
|
||||
find_hash(args.hash, args.base_dir, args.test_id)
|
||||
elif args.command == "show":
|
||||
show_recording(args.file)
|
||||
elif args.command == "list-test":
|
||||
list_test_recordings(args.test_id, args.base_dir)
|
||||
elif args.command == "explain-paths":
|
||||
explain_paths(args.test_id, args.base_dir)
|
||||
elif args.command == "compute-hash":
|
||||
compute_hash(args.endpoint, args.method, args.body, args.test_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
358
scripts/docker.sh
Executable file
358
scripts/docker.sh
Executable file
|
|
@ -0,0 +1,358 @@
|
|||
#!/bin/bash
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Docker container management script for Llama Stack
|
||||
# Allows starting/stopping/restarting a Llama Stack docker container for testing
|
||||
|
||||
# Default values
|
||||
DISTRO=""
|
||||
PORT=8321
|
||||
INFERENCE_MODE="replay"
|
||||
COMMAND=""
|
||||
USE_COPY_NOT_MOUNT=false
|
||||
NO_REBUILD=false
|
||||
|
||||
# Function to display usage
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $0 COMMAND [OPTIONS]
|
||||
|
||||
Commands:
|
||||
start Build and start the docker container
|
||||
stop Stop and remove the docker container
|
||||
restart Restart the docker container
|
||||
status Check if the container is running
|
||||
logs Show container logs (add -f to follow)
|
||||
|
||||
Options:
|
||||
--distro STRING Distribution name (e.g., 'ci-tests', 'starter') (required for start/restart)
|
||||
--port NUMBER Port to run on (default: 8321)
|
||||
--inference-mode STRING Inference mode: replay, record-if-missing or record (default: replay)
|
||||
--copy-source Copy source into image instead of mounting (default: auto-detect CI, otherwise mount)
|
||||
--no-rebuild Skip building the image, just start the container (default: false)
|
||||
--help Show this help message
|
||||
|
||||
Examples:
|
||||
# Start a docker container (local dev mode - mounts source, builds image)
|
||||
$0 start --distro ci-tests
|
||||
|
||||
# Start without rebuilding (uses existing image)
|
||||
$0 start --distro ci-tests --no-rebuild
|
||||
|
||||
# Start with source copied into image (like CI)
|
||||
$0 start --distro ci-tests --copy-source
|
||||
|
||||
# Start with custom port
|
||||
$0 start --distro starter --port 8080
|
||||
|
||||
# Check status
|
||||
$0 status --distro ci-tests
|
||||
|
||||
# View logs
|
||||
$0 logs --distro ci-tests
|
||||
|
||||
# Stop container
|
||||
$0 stop --distro ci-tests
|
||||
|
||||
# Restart container
|
||||
$0 restart --distro ci-tests
|
||||
|
||||
Note: In CI environments (detected via CI or GITHUB_ACTIONS env vars), source is
|
||||
automatically copied into the image. Locally, source is mounted for live development
|
||||
unless --copy-source is specified.
|
||||
EOF
|
||||
}
|
||||
|
||||
# Parse command (first positional arg)
|
||||
if [[ $# -eq 0 ]]; then
|
||||
echo "Error: Command required"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
COMMAND="$1"
|
||||
shift
|
||||
|
||||
# Validate command
|
||||
case "$COMMAND" in
|
||||
start | stop | restart | status | logs) ;;
|
||||
--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown command: $COMMAND"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# Parse options
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
--distro)
|
||||
DISTRO="$2"
|
||||
shift 2
|
||||
;;
|
||||
--port)
|
||||
PORT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--inference-mode)
|
||||
INFERENCE_MODE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--copy-source)
|
||||
USE_COPY_NOT_MOUNT=true
|
||||
shift
|
||||
;;
|
||||
--no-rebuild)
|
||||
NO_REBUILD=true
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate required parameters for commands that need them
|
||||
if [[ "$COMMAND" != "stop" && "$COMMAND" != "status" && "$COMMAND" != "logs" ]]; then
|
||||
if [[ -z "$DISTRO" ]]; then
|
||||
echo "Error: --distro is required for '$COMMAND' command"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# If distro not provided for stop/status/logs, try to infer from running containers
|
||||
if [[ -z "$DISTRO" && ("$COMMAND" == "stop" || "$COMMAND" == "status" || "$COMMAND" == "logs") ]]; then
|
||||
# Look for any llama-stack-test-* container
|
||||
RUNNING_CONTAINERS=$(docker ps -a --filter "name=llama-stack-test-" --format "{{.Names}}" | head -1)
|
||||
if [[ -n "$RUNNING_CONTAINERS" ]]; then
|
||||
DISTRO=$(echo "$RUNNING_CONTAINERS" | sed 's/llama-stack-test-//')
|
||||
echo "Found running container for distro: $DISTRO"
|
||||
else
|
||||
echo "Error: --distro is required (no running containers found)"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove docker: prefix if present
|
||||
DISTRO=$(echo "$DISTRO" | sed 's/^docker://')
|
||||
|
||||
CONTAINER_NAME="llama-stack-test-$DISTRO"
|
||||
|
||||
# Function to check if container is running
|
||||
is_container_running() {
|
||||
docker ps --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
|
||||
}
|
||||
|
||||
# Function to check if container exists (running or stopped)
|
||||
container_exists() {
|
||||
docker ps -a --filter "name=^${CONTAINER_NAME}$" --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"
|
||||
}
|
||||
|
||||
# Function to stop and remove container
|
||||
stop_container() {
|
||||
if container_exists; then
|
||||
echo "Stopping container: $CONTAINER_NAME"
|
||||
docker stop "$CONTAINER_NAME" 2>/dev/null || true
|
||||
echo "Removing container: $CONTAINER_NAME"
|
||||
docker rm "$CONTAINER_NAME" 2>/dev/null || true
|
||||
echo "✅ Container stopped and removed"
|
||||
else
|
||||
echo "⚠️ Container $CONTAINER_NAME does not exist"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to build docker image
|
||||
build_image() {
|
||||
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||
# Get the repo root (parent of scripts directory)
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||
|
||||
# Determine whether to copy or mount source
|
||||
# Copy in CI or if explicitly requested, otherwise mount for live development
|
||||
BUILD_ENV="LLAMA_STACK_DIR=$REPO_ROOT"
|
||||
if [[ "$USE_COPY_NOT_MOUNT" == "true" ]] || [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
|
||||
echo "Copying source into image (USE_COPY_NOT_MOUNT=true, CI=${CI:-false}, GITHUB_ACTIONS=${GITHUB_ACTIONS:-false})"
|
||||
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
|
||||
else
|
||||
echo "Will mount source for live development"
|
||||
fi
|
||||
|
||||
if ! eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"; then
|
||||
echo "❌ Failed to build Docker image"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Docker image built successfully"
|
||||
}
|
||||
|
||||
# Function to start container
|
||||
start_container() {
|
||||
# Check if already running
|
||||
if is_container_running; then
|
||||
echo "⚠️ Container $CONTAINER_NAME is already running"
|
||||
echo "URL: http://localhost:$PORT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Stop and remove if exists but not running
|
||||
if container_exists; then
|
||||
echo "Removing existing stopped container..."
|
||||
docker rm "$CONTAINER_NAME" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Build the image unless --no-rebuild was specified
|
||||
if [[ "$NO_REBUILD" == "true" ]]; then
|
||||
echo "Skipping build (--no-rebuild specified)"
|
||||
# Check if image exists (with or without localhost/ prefix)
|
||||
if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "distribution-$DISTRO:dev$"; then
|
||||
echo "❌ Error: Image distribution-$DISTRO:dev does not exist"
|
||||
echo "Either build it first without --no-rebuild, or run: llama stack build --distro $DISTRO --image-type container"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ Found existing image for distribution-$DISTRO:dev"
|
||||
else
|
||||
build_image
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Starting Docker Container ==="
|
||||
|
||||
# Get the repo root for volume mount
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
|
||||
|
||||
# Determine the actual image name (may have localhost/ prefix)
|
||||
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
|
||||
if [[ -z "$IMAGE_NAME" ]]; then
|
||||
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
|
||||
exit 1
|
||||
fi
|
||||
echo "Using image: $IMAGE_NAME"
|
||||
|
||||
# Build environment variables for docker run
|
||||
DOCKER_ENV_VARS=""
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||
|
||||
# Set default OLLAMA_URL if not provided
|
||||
# On macOS/Windows, use host.docker.internal to reach host from container
|
||||
# On Linux with --network host, use localhost
|
||||
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
|
||||
else
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
|
||||
fi
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||
|
||||
# Pass through API keys if they exist
|
||||
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
|
||||
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
|
||||
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
|
||||
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
|
||||
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
|
||||
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
|
||||
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
|
||||
[ -n "${SQLITE_STORE_DIR:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SQLITE_STORE_DIR=$SQLITE_STORE_DIR"
|
||||
|
||||
# Use --network host on Linux only (macOS doesn't support it properly)
|
||||
NETWORK_MODE=""
|
||||
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||
NETWORK_MODE="--network host"
|
||||
fi
|
||||
|
||||
docker run -d $NETWORK_MODE --name "$CONTAINER_NAME" \
|
||||
-p $PORT:$PORT \
|
||||
$DOCKER_ENV_VARS \
|
||||
-v "$REPO_ROOT":/app/llama-stack-source \
|
||||
"$IMAGE_NAME" \
|
||||
--port $PORT
|
||||
|
||||
echo "Waiting for container to start..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:$PORT/v1/health 2>/dev/null | grep -q "OK"; then
|
||||
echo "✅ Container started successfully"
|
||||
echo ""
|
||||
echo "=== Container Information ==="
|
||||
echo "Container name: $CONTAINER_NAME"
|
||||
echo "URL: http://localhost:$PORT"
|
||||
echo "Health check: http://localhost:$PORT/v1/health"
|
||||
echo ""
|
||||
echo "To view logs: $0 logs --distro $DISTRO"
|
||||
echo "To stop: $0 stop --distro $DISTRO"
|
||||
return 0
|
||||
fi
|
||||
if [[ $i -eq 30 ]]; then
|
||||
echo "❌ Container failed to start within timeout"
|
||||
echo "Showing container logs:"
|
||||
docker logs "$CONTAINER_NAME"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
# Execute command
|
||||
case "$COMMAND" in
|
||||
start)
|
||||
start_container
|
||||
;;
|
||||
stop)
|
||||
stop_container
|
||||
;;
|
||||
restart)
|
||||
echo "Restarting container: $CONTAINER_NAME"
|
||||
stop_container
|
||||
echo ""
|
||||
start_container
|
||||
;;
|
||||
status)
|
||||
if is_container_running; then
|
||||
echo "✅ Container $CONTAINER_NAME is running"
|
||||
echo "URL: http://localhost:$PORT"
|
||||
# Try to get the actual port from the container
|
||||
ACTUAL_PORT=$(docker port "$CONTAINER_NAME" 2>/dev/null | grep "8321/tcp" | cut -d':' -f2 | head -1)
|
||||
if [[ -n "$ACTUAL_PORT" ]]; then
|
||||
echo "Port: $ACTUAL_PORT"
|
||||
fi
|
||||
elif container_exists; then
|
||||
echo "⚠️ Container $CONTAINER_NAME exists but is not running"
|
||||
echo "Start it with: $0 start --distro $DISTRO"
|
||||
else
|
||||
echo "❌ Container $CONTAINER_NAME does not exist"
|
||||
echo "Start it with: $0 start --distro $DISTRO"
|
||||
fi
|
||||
;;
|
||||
logs)
|
||||
if container_exists; then
|
||||
echo "=== Logs for $CONTAINER_NAME ==="
|
||||
# Check if -f flag was passed after 'logs' command
|
||||
if [[ "${1:-}" == "-f" || "${1:-}" == "--follow" ]]; then
|
||||
docker logs --tail 100 --follow "$CONTAINER_NAME"
|
||||
else
|
||||
docker logs --tail 100 "$CONTAINER_NAME"
|
||||
fi
|
||||
else
|
||||
echo "❌ Container $CONTAINER_NAME does not exist"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
|
@ -5,10 +5,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
[ -z "$BASH_VERSION" ] && {
|
||||
echo "This script must be run with bash" >&2
|
||||
exit 1
|
||||
}
|
||||
[ -z "${BASH_VERSION:-}" ] && exec /usr/bin/env bash "$0" "$@"
|
||||
if set -o | grep -Eq 'posix[[:space:]]+on'; then
|
||||
exec /usr/bin/env bash "$0" "$@"
|
||||
fi
|
||||
|
||||
set -Eeuo pipefail
|
||||
|
||||
|
|
@ -18,12 +18,110 @@ MODEL_ALIAS="llama3.2:3b"
|
|||
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
||||
WAIT_TIMEOUT=30
|
||||
TEMP_LOG=""
|
||||
WITH_TELEMETRY=true
|
||||
TELEMETRY_SERVICE_NAME="llama-stack"
|
||||
TELEMETRY_SINKS="otel_trace,otel_metric"
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT="http://otel-collector:4318"
|
||||
TEMP_TELEMETRY_DIR=""
|
||||
|
||||
materialize_telemetry_configs() {
|
||||
local dest="$1"
|
||||
mkdir -p "$dest"
|
||||
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||
local prom_cfg="${dest}/prometheus.yml"
|
||||
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||
|
||||
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
||||
if [ -e "$asset" ]; then
|
||||
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||
fi
|
||||
done
|
||||
|
||||
cat <<'EOF' > "$otel_cfg"
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 1s
|
||||
send_batch_size: 1024
|
||||
|
||||
exporters:
|
||||
# Export traces to Jaeger
|
||||
otlp/jaeger:
|
||||
endpoint: jaeger:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
# Export metrics to Prometheus
|
||||
prometheus:
|
||||
endpoint: 0.0.0.0:9464
|
||||
namespace: llama_stack
|
||||
|
||||
# Debug exporter for troubleshooting
|
||||
debug:
|
||||
verbosity: detailed
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [otlp/jaeger, debug]
|
||||
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [prometheus, debug]
|
||||
EOF
|
||||
|
||||
cat <<'EOF' > "$prom_cfg"
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'otel-collector'
|
||||
static_configs:
|
||||
- targets: ['otel-collector:9464']
|
||||
EOF
|
||||
|
||||
cat <<'EOF' > "$graf_cfg"
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
|
||||
- name: Jaeger
|
||||
type: jaeger
|
||||
access: proxy
|
||||
url: http://jaeger:16686
|
||||
editable: true
|
||||
EOF
|
||||
}
|
||||
|
||||
# Cleanup function to remove temporary files
|
||||
cleanup() {
|
||||
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
||||
rm -f "$TEMP_LOG"
|
||||
fi
|
||||
if [ -n "$TEMP_TELEMETRY_DIR" ] && [ -d "$TEMP_TELEMETRY_DIR" ]; then
|
||||
rm -rf "$TEMP_TELEMETRY_DIR"
|
||||
fi
|
||||
}
|
||||
|
||||
# Set up trap to clean up on exit, error, or interrupt
|
||||
|
|
@ -32,7 +130,7 @@ trap cleanup EXIT ERR INT TERM
|
|||
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
|
||||
die(){
|
||||
printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2
|
||||
printf "\e[1;31m🐛 Report an issue @ https://github.com/meta-llama/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
||||
printf "\e[1;31m🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
|
|
@ -89,6 +187,12 @@ Options:
|
|||
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
||||
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
||||
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
||||
--with-telemetry Provision Jaeger, OTEL Collector, Prometheus, and Grafana (default: enabled)
|
||||
--no-telemetry, --without-telemetry
|
||||
Skip provisioning the telemetry stack
|
||||
--telemetry-service NAME Service name reported to telemetry (default: ${TELEMETRY_SERVICE_NAME})
|
||||
--telemetry-sinks SINKS Comma-separated telemetry sinks (default: ${TELEMETRY_SINKS})
|
||||
--otel-endpoint URL OTLP endpoint provided to Llama Stack (default: ${OTEL_EXPORTER_OTLP_ENDPOINT})
|
||||
-h, --help Show this help message
|
||||
|
||||
For more information:
|
||||
|
|
@ -127,6 +231,26 @@ while [[ $# -gt 0 ]]; do
|
|||
WAIT_TIMEOUT="$2"
|
||||
shift 2
|
||||
;;
|
||||
--with-telemetry)
|
||||
WITH_TELEMETRY=true
|
||||
shift
|
||||
;;
|
||||
--no-telemetry|--without-telemetry)
|
||||
WITH_TELEMETRY=false
|
||||
shift
|
||||
;;
|
||||
--telemetry-service)
|
||||
TELEMETRY_SERVICE_NAME="$2"
|
||||
shift 2
|
||||
;;
|
||||
--telemetry-sinks)
|
||||
TELEMETRY_SINKS="$2"
|
||||
shift 2
|
||||
;;
|
||||
--otel-endpoint)
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
die "Unknown option: $1"
|
||||
;;
|
||||
|
|
@ -171,7 +295,11 @@ if [ "$ENGINE" = "podman" ] && [ "$(uname -s)" = "Darwin" ]; then
|
|||
fi
|
||||
|
||||
# Clean up any leftovers from earlier runs
|
||||
for name in ollama-server llama-stack; do
|
||||
containers=(ollama-server llama-stack)
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
containers+=(jaeger otel-collector prometheus grafana)
|
||||
fi
|
||||
for name in "${containers[@]}"; do
|
||||
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
||||
if [ -n "$ids" ]; then
|
||||
log "⚠️ Found existing container(s) for '${name}', removing..."
|
||||
|
|
@ -191,6 +319,64 @@ if ! $ENGINE network inspect llama-net >/dev/null 2>&1; then
|
|||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Telemetry Stack
|
||||
###############################################################################
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
TEMP_TELEMETRY_DIR="$(mktemp -d)"
|
||||
TELEMETRY_ASSETS_DIR="$TEMP_TELEMETRY_DIR"
|
||||
log "🧰 Materializing telemetry configs..."
|
||||
materialize_telemetry_configs "$TELEMETRY_ASSETS_DIR"
|
||||
|
||||
log "📡 Starting telemetry stack..."
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name jaeger \
|
||||
--network llama-net \
|
||||
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||
-p 16686:16686 \
|
||||
-p 14250:14250 \
|
||||
-p 9411:9411 \
|
||||
docker.io/jaegertracing/all-in-one:latest > /dev/null 2>&1; then
|
||||
die "Jaeger startup failed"
|
||||
fi
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name otel-collector \
|
||||
--network llama-net \
|
||||
-p 4318:4318 \
|
||||
-p 4317:4317 \
|
||||
-p 9464:9464 \
|
||||
-p 13133:13133 \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z" \
|
||||
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||
--config /etc/otel-collector-config.yaml > /dev/null 2>&1; then
|
||||
die "OpenTelemetry Collector startup failed"
|
||||
fi
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name prometheus \
|
||||
--network llama-net \
|
||||
-p 9090:9090 \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/prometheus.yml:/etc/prometheus/prometheus.yml:Z" \
|
||||
docker.io/prom/prometheus:latest \
|
||||
--config.file=/etc/prometheus/prometheus.yml \
|
||||
--storage.tsdb.path=/prometheus \
|
||||
--web.console.libraries=/etc/prometheus/console_libraries \
|
||||
--web.console.templates=/etc/prometheus/consoles \
|
||||
--storage.tsdb.retention.time=200h \
|
||||
--web.enable-lifecycle > /dev/null 2>&1; then
|
||||
die "Prometheus startup failed"
|
||||
fi
|
||||
|
||||
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name grafana \
|
||||
--network llama-net \
|
||||
-p 3000:3000 \
|
||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||
die "Grafana startup failed"
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# 1. Ollama
|
||||
###############################################################################
|
||||
|
|
@ -218,9 +404,19 @@ fi
|
|||
###############################################################################
|
||||
# 2. Llama‑Stack
|
||||
###############################################################################
|
||||
server_env_opts=()
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
server_env_opts+=(
|
||||
-e TELEMETRY_SINKS="${TELEMETRY_SINKS}"
|
||||
-e OTEL_EXPORTER_OTLP_ENDPOINT="${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||
-e OTEL_SERVICE_NAME="${TELEMETRY_SERVICE_NAME}"
|
||||
)
|
||||
fi
|
||||
|
||||
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
||||
--network llama-net \
|
||||
-p "${PORT}:${PORT}" \
|
||||
"${server_env_opts[@]}" \
|
||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
||||
"${SERVER_IMAGE}" --port "${PORT}")
|
||||
|
||||
|
|
@ -244,5 +440,12 @@ log "👉 API endpoint: http://localhost:${PORT}"
|
|||
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
||||
log "💻 To access the llama stack CLI, exec into the container:"
|
||||
log " $ENGINE exec -ti llama-stack bash"
|
||||
if [ "$WITH_TELEMETRY" = true ]; then
|
||||
log "📡 Telemetry dashboards:"
|
||||
log " Jaeger UI: http://localhost:16686"
|
||||
log " Prometheus UI: http://localhost:9090"
|
||||
log " Grafana UI: http://localhost:3000 (admin/admin)"
|
||||
log " OTEL Collector: http://localhost:4318"
|
||||
fi
|
||||
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
||||
log ""
|
||||
|
|
|
|||
|
|
@ -42,9 +42,12 @@ Setups are defined in tests/integration/setups.py and provide global configurati
|
|||
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
|
||||
|
||||
Examples:
|
||||
# Basic inference tests with ollama
|
||||
# Basic inference tests with ollama (server mode)
|
||||
$0 --stack-config server:ci-tests --suite base --setup ollama
|
||||
|
||||
# Basic inference tests with docker
|
||||
$0 --stack-config docker:ci-tests --suite base --setup ollama
|
||||
|
||||
# Multiple test directories with vllm
|
||||
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
|
||||
|
||||
|
|
@ -153,7 +156,7 @@ echo "Setting SQLITE_STORE_DIR: $SQLITE_STORE_DIR"
|
|||
|
||||
# Determine stack config type for api_recorder test isolation
|
||||
if [[ "$COLLECT_ONLY" == false ]]; then
|
||||
if [[ "$STACK_CONFIG" == server:* ]]; then
|
||||
if [[ "$STACK_CONFIG" == server:* ]] || [[ "$STACK_CONFIG" == docker:* ]]; then
|
||||
export LLAMA_STACK_TEST_STACK_CONFIG_TYPE="server"
|
||||
echo "Setting stack config type: server"
|
||||
else
|
||||
|
|
@ -229,6 +232,104 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
trap stop_server EXIT ERR INT TERM
|
||||
fi
|
||||
|
||||
# Start Docker Container if needed
|
||||
if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
||||
stop_container() {
|
||||
echo "Stopping Docker container..."
|
||||
container_name="llama-stack-test-$DISTRO"
|
||||
if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
|
||||
echo "Stopping and removing container: $container_name"
|
||||
docker stop "$container_name" 2>/dev/null || true
|
||||
docker rm "$container_name" 2>/dev/null || true
|
||||
else
|
||||
echo "No container named $container_name found"
|
||||
fi
|
||||
echo "Docker container stopped"
|
||||
}
|
||||
|
||||
# Extract distribution name from docker:distro format
|
||||
DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://')
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||
# Set LLAMA_STACK_DIR to repo root
|
||||
# USE_COPY_NOT_MOUNT copies files into image (for CI), otherwise mounts for live development
|
||||
BUILD_ENV="LLAMA_STACK_DIR=$ROOT_DIR"
|
||||
if [[ "${CI:-false}" == "true" ]] || [[ "${GITHUB_ACTIONS:-false}" == "true" ]]; then
|
||||
echo "CI detected (CI=$CI, GITHUB_ACTIONS=$GITHUB_ACTIONS): copying source into image"
|
||||
BUILD_ENV="USE_COPY_NOT_MOUNT=true $BUILD_ENV"
|
||||
else
|
||||
echo "Local mode: will mount source for live development"
|
||||
fi
|
||||
|
||||
eval "$BUILD_ENV llama stack build --distro '$DISTRO' --image-type container"
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "❌ Failed to build Docker image"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Starting Docker Container ==="
|
||||
container_name="llama-stack-test-$DISTRO"
|
||||
|
||||
# Stop and remove existing container if it exists
|
||||
docker stop "$container_name" 2>/dev/null || true
|
||||
docker rm "$container_name" 2>/dev/null || true
|
||||
|
||||
# Build environment variables for docker run
|
||||
DOCKER_ENV_VARS=""
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||
|
||||
# Pass through API keys if they exist
|
||||
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||
[ -n "${FIREWORKS_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e FIREWORKS_API_KEY=$FIREWORKS_API_KEY"
|
||||
[ -n "${TAVILY_SEARCH_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY"
|
||||
[ -n "${OPENAI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OPENAI_API_KEY=$OPENAI_API_KEY"
|
||||
[ -n "${ANTHROPIC_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY"
|
||||
[ -n "${GROQ_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GROQ_API_KEY=$GROQ_API_KEY"
|
||||
[ -n "${GEMINI_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e GEMINI_API_KEY=$GEMINI_API_KEY"
|
||||
[ -n "${OLLAMA_URL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||
[ -n "${SAFETY_MODEL:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e SAFETY_MODEL=$SAFETY_MODEL"
|
||||
|
||||
# Determine the actual image name (may have localhost/ prefix)
|
||||
IMAGE_NAME=$(docker images --format "{{.Repository}}:{{.Tag}}" | grep "distribution-$DISTRO:dev$" | head -1)
|
||||
if [[ -z "$IMAGE_NAME" ]]; then
|
||||
echo "❌ Error: Could not find image for distribution-$DISTRO:dev"
|
||||
exit 1
|
||||
fi
|
||||
echo "Using image: $IMAGE_NAME"
|
||||
|
||||
docker run -d --network host --name "$container_name" \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
$DOCKER_ENV_VARS \
|
||||
-v $ROOT_DIR:/app/llama-stack-source \
|
||||
"$IMAGE_NAME" \
|
||||
--port $LLAMA_STACK_PORT
|
||||
|
||||
echo "Waiting for Docker container to start..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then
|
||||
echo "✅ Docker container started successfully"
|
||||
break
|
||||
fi
|
||||
if [[ $i -eq 30 ]]; then
|
||||
echo "❌ Docker container failed to start"
|
||||
echo "Container logs:"
|
||||
docker logs "$container_name"
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Update STACK_CONFIG to point to the running container
|
||||
STACK_CONFIG="http://localhost:$LLAMA_STACK_PORT"
|
||||
|
||||
trap stop_container EXIT ERR INT TERM
|
||||
fi
|
||||
|
||||
# Run tests
|
||||
echo "=== Running Integration Tests ==="
|
||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||
|
|
|
|||
|
|
@ -70,10 +70,15 @@ class BatchHelper:
|
|||
):
|
||||
"""Wait for a batch to reach a terminal status.
|
||||
|
||||
Uses exponential backoff polling strategy for efficient waiting:
|
||||
- Starts with short intervals (0.1s) for fast batches (e.g., replay mode)
|
||||
- Doubles interval each iteration up to a maximum
|
||||
- Adapts automatically to both fast and slow batch processing
|
||||
|
||||
Args:
|
||||
batch_id: The batch ID to monitor
|
||||
max_wait_time: Maximum time to wait in seconds (default: 60 seconds)
|
||||
sleep_interval: Time to sleep between checks in seconds (default: 1/10th of max_wait_time, min 1s, max 15s)
|
||||
sleep_interval: If provided, uses fixed interval instead of exponential backoff
|
||||
expected_statuses: Set of expected terminal statuses (default: {"completed"})
|
||||
timeout_action: Action on timeout - "fail" (pytest.fail) or "skip" (pytest.skip)
|
||||
|
||||
|
|
@ -84,10 +89,6 @@ class BatchHelper:
|
|||
pytest.Failed: If batch reaches an unexpected status or timeout_action is "fail"
|
||||
pytest.Skipped: If timeout_action is "skip" on timeout or unexpected status
|
||||
"""
|
||||
if sleep_interval is None:
|
||||
# Default to 1/10th of max_wait_time, with min 1s and max 15s
|
||||
sleep_interval = max(1, min(15, max_wait_time // 10))
|
||||
|
||||
if expected_statuses is None:
|
||||
expected_statuses = {"completed"}
|
||||
|
||||
|
|
@ -95,6 +96,15 @@ class BatchHelper:
|
|||
unexpected_statuses = terminal_statuses - expected_statuses
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Use exponential backoff if no explicit sleep_interval provided
|
||||
if sleep_interval is None:
|
||||
current_interval = 0.1 # Start with 100ms
|
||||
max_interval = 10.0 # Cap at 10 seconds
|
||||
else:
|
||||
current_interval = sleep_interval
|
||||
max_interval = sleep_interval
|
||||
|
||||
while time.time() - start_time < max_wait_time:
|
||||
current_batch = self.client.batches.retrieve(batch_id)
|
||||
|
||||
|
|
@ -107,7 +117,11 @@ class BatchHelper:
|
|||
else:
|
||||
pytest.fail(error_msg)
|
||||
|
||||
time.sleep(sleep_interval)
|
||||
time.sleep(current_interval)
|
||||
|
||||
# Exponential backoff: double the interval each time, up to max
|
||||
if sleep_interval is None:
|
||||
current_interval = min(current_interval * 2, max_interval)
|
||||
|
||||
timeout_msg = f"Batch did not reach expected status {expected_statuses} within {max_wait_time} seconds"
|
||||
if timeout_action == "skip":
|
||||
|
|
|
|||
|
|
@ -0,0 +1,506 @@
|
|||
{
|
||||
"test_id": null,
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is 2 + 2?"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "The answer to the equation 2 + 2 is 4."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Tell me a short joke"
|
||||
}
|
||||
],
|
||||
"max_tokens": 0,
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Why",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " did",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " scare",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "crow",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " win",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " an",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " award",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "?\n\n",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Because",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " he",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " was",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " outstanding",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " in",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " his",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " field",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-ab1a32474062",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
{
|
||||
"test_id": null,
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/models",
|
||||
"headers": {},
|
||||
"body": {},
|
||||
"endpoint": "/v1/models",
|
||||
"model": ""
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:3b-instruct-fp16",
|
||||
"created": 1760453641,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "qwen3:4b",
|
||||
"created": 1757615302,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "gpt-oss:latest",
|
||||
"created": 1756395223,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "nomic-embed-text:latest",
|
||||
"created": 1756318548,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:3b",
|
||||
"created": 1755191039,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "all-minilm:l6-v2",
|
||||
"created": 1753968177,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:1b",
|
||||
"created": 1746124735,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.model.Model",
|
||||
"__data__": {
|
||||
"id": "llama3.2:latest",
|
||||
"created": 1746044170,
|
||||
"object": "model",
|
||||
"owned_by": "library"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
|
|
@ -42,7 +42,9 @@ def pytest_sessionstart(session):
|
|||
|
||||
# Set test stack config type for api_recorder test isolation
|
||||
stack_config = session.config.getoption("--stack-config", default=None)
|
||||
if stack_config and (stack_config.startswith("server:") or stack_config.startswith("http")):
|
||||
if stack_config and (
|
||||
stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http")
|
||||
):
|
||||
os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
|
||||
logger.info(f"Test stack config type: server (stack_config={stack_config})")
|
||||
else:
|
||||
|
|
@ -139,7 +141,9 @@ def pytest_addoption(parser):
|
|||
a 'pointer' to the stack. this can be either be:
|
||||
(a) a template name like `starter`, or
|
||||
(b) a path to a run.yaml file, or
|
||||
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
|
||||
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
|
||||
(d) a server config like `server:ci-tests`, or
|
||||
(e) a docker config like `docker:ci-tests` (builds and runs container)
|
||||
"""
|
||||
),
|
||||
)
|
||||
|
|
|
|||
95
tests/integration/telemetry/conftest.py
Normal file
95
tests/integration/telemetry/conftest.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Telemetry test configuration using OpenTelemetry SDK exporters.
|
||||
|
||||
This conftest provides in-memory telemetry collection for library_client mode only.
|
||||
Tests using these fixtures should skip in server mode since the in-memory collector
|
||||
cannot access spans from a separate server process.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import opentelemetry.metrics as otel_metrics
|
||||
import opentelemetry.trace as otel_trace
|
||||
import pytest
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import InMemoryMetricReader
|
||||
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
|
||||
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
||||
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||
|
||||
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
|
||||
from llama_stack.testing.api_recorder import patch_httpx_for_test_id
|
||||
from tests.integration.fixtures.common import instantiate_llama_stack_client
|
||||
|
||||
|
||||
class TestCollector:
|
||||
def __init__(self, span_exp, metric_read):
|
||||
assert span_exp and metric_read
|
||||
self.span_exporter = span_exp
|
||||
self.metric_reader = metric_read
|
||||
|
||||
def get_spans(self) -> tuple[ReadableSpan, ...]:
|
||||
return self.span_exporter.get_finished_spans()
|
||||
|
||||
def get_metrics(self) -> Any | None:
|
||||
metrics = self.metric_reader.get_metrics_data()
|
||||
if metrics and metrics.resource_metrics:
|
||||
return metrics.resource_metrics[0].scope_metrics[0].metrics
|
||||
return None
|
||||
|
||||
def clear(self) -> None:
|
||||
self.span_exporter.clear()
|
||||
self.metric_reader.get_metrics_data()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def _telemetry_providers():
|
||||
"""Set up in-memory OTEL providers before llama_stack_client initializes."""
|
||||
# Reset set-once flags to allow re-initialization
|
||||
if hasattr(otel_trace, "_TRACER_PROVIDER_SET_ONCE"):
|
||||
otel_trace._TRACER_PROVIDER_SET_ONCE._done = False # type: ignore
|
||||
if hasattr(otel_metrics, "_METER_PROVIDER_SET_ONCE"):
|
||||
otel_metrics._METER_PROVIDER_SET_ONCE._done = False # type: ignore
|
||||
|
||||
# Create in-memory exporters/readers
|
||||
span_exporter = InMemorySpanExporter()
|
||||
tracer_provider = TracerProvider()
|
||||
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
metric_reader = InMemoryMetricReader()
|
||||
meter_provider = MeterProvider(metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
# Set module-level provider so TelemetryAdapter uses our in-memory providers
|
||||
telemetry_module._TRACER_PROVIDER = tracer_provider
|
||||
|
||||
yield (span_exporter, metric_reader, tracer_provider, meter_provider)
|
||||
|
||||
telemetry_module._TRACER_PROVIDER = None
|
||||
tracer_provider.shutdown()
|
||||
meter_provider.shutdown()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def llama_stack_client(_telemetry_providers, request):
|
||||
"""Override llama_stack_client to ensure in-memory telemetry providers are used."""
|
||||
patch_httpx_for_test_id()
|
||||
client = instantiate_llama_stack_client(request.session)
|
||||
|
||||
return client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_otlp_collector(_telemetry_providers):
|
||||
"""Provides access to telemetry data and clears between tests."""
|
||||
span_exporter, metric_reader, _, _ = _telemetry_providers
|
||||
collector = TestCollector(span_exporter, metric_reader)
|
||||
yield collector
|
||||
collector.clear()
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"test_id": "tests/integration/telemetry/test_openai_telemetry.py::test_openai_completion_creates_telemetry[txt=ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test OpenAI telemetry creation"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-0de60cd6a6ec",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "I'm happy to help you with setting up and testing OpenAI's telemetry creation.\n\nOpenAI provides a feature called \"Telemetry\" which allows developers to collect data about their users' interactions with the model. To test this feature, we need to create a simple application that uses the OpenAI API and sends telemetry data to their servers.\n\nHere's an example code in Python that demonstrates how to create a simple telemetry creator:\n\n```python\nimport os\nfrom openai.api import API\n\n# Initialize the OpenAI API client\napi = API(os.environ['OPENAI_API_KEY'])\n\ndef create_user():\n # Create a new user entity\n user_entity = {\n 'id': 'user-123',\n 'name': 'John Doe',\n 'email': 'john.doe@example.com'\n }\n \n # Send the user creation request to OpenAI\n response = api.users.create(user_entity)\n print(f\"User created: {response}\")\n\ndef create_transaction():\n # Create a new transaction entity\n transaction_entity = {\n 'id': 'tran-123',\n 'user_id': 'user-123',\n 'transaction_type': 'query'\n }\n \n # Send the transaction creation request to OpenAI\n response = api.transactions.create(transaction_entity)\n print(f\"Transaction created: {response}\")\n\ndef send_telemetry_data():\n # Create a new telemetry event entity\n telemetry_event_entity = {\n 'id': 'telem-123',\n 'transaction_id': 'tran-123',\n 'data': '{ \"event\": \"test\", \"user_id\": 1 }'\n }\n \n # Send the telemetry data to OpenAI\n response = api.telemetry.create(telemetry_event_entity)\n print(f\"Telemetry event sent: {response}\")\n\n# Test the telemetry creation\ncreate_user()\ncreate_transaction()\nsend_telemetry_data()\n```\n\nMake sure you replace `OPENAI_API_KEY` with your actual API key. Also, ensure that you have the OpenAI API client library installed by running `pip install openai`.\n\nOnce you've created the test code, run it and observe the behavior of the telemetry creation process.\n\nPlease let me know if you need further modifications or assistance!",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 460,
|
||||
"prompt_tokens": 30,
|
||||
"total_tokens": 490,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
{
|
||||
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test trace openai with temperature 0.7"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100,
|
||||
"stream": false,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-1fcfd86d8111",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n input",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 100,
|
||||
"prompt_tokens": 35,
|
||||
"total_tokens": 135,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
4211
tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
generated
Normal file
4211
tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
generated
Normal file
File diff suppressed because it is too large
Load diff
4263
tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
generated
Normal file
4263
tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,59 @@
|
|||
{
|
||||
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://localhost:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test trace openai with temperature 0.7"
|
||||
}
|
||||
],
|
||||
"max_tokens": 100,
|
||||
"stream": false,
|
||||
"temperature": 0.7
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-dba5042d6691",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "To test the \"trace\" functionality of OpenAI's GPT-4 model at a temperature of 0.7, you can follow these steps:\n\n1. First, make sure you have an account with OpenAI and have been granted access to their API.\n\n2. You will need to install the `transformers` library, which is the official library for working with Transformers models like GPT-4:\n\n ```bash\npip install transformers\n```\n\n3. Next, import the necessary",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 100,
|
||||
"prompt_tokens": 35,
|
||||
"total_tokens": 135,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
||||
112
tests/integration/telemetry/test_completions.py
Normal file
112
tests/integration/telemetry/test_completions.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
"""Telemetry tests verifying @trace_protocol decorator format using in-memory exporter."""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE") == "server",
|
||||
reason="In-memory telemetry tests only work in library_client mode (server mode runs in separate process)",
|
||||
)
|
||||
|
||||
|
||||
def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Verify streaming adds chunk_count and __type__=async_generator."""
|
||||
|
||||
stream = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test trace openai 1"}],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
chunks = list(stream)
|
||||
assert len(chunks) > 0
|
||||
|
||||
spans = mock_otlp_collector.get_spans()
|
||||
assert len(spans) > 0
|
||||
|
||||
chunk_count = None
|
||||
for span in spans:
|
||||
if span.attributes.get("__type__") == "async_generator":
|
||||
chunk_count = span.attributes.get("chunk_count")
|
||||
if chunk_count:
|
||||
chunk_count = int(chunk_count)
|
||||
break
|
||||
|
||||
assert chunk_count is not None
|
||||
assert chunk_count == len(chunks)
|
||||
|
||||
|
||||
def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
|
||||
"""Comprehensive validation of telemetry data format including spans and metrics."""
|
||||
response = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],
|
||||
temperature=0.7,
|
||||
max_tokens=100,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Handle both dict and Pydantic model for usage
|
||||
# This occurs due to the replay system returning a dict for usage, but the client returning a Pydantic model
|
||||
# TODO: Fix this by making the replay system return a Pydantic model for usage
|
||||
usage = response.usage if isinstance(response.usage, dict) else response.usage.model_dump()
|
||||
assert usage.get("prompt_tokens") and usage["prompt_tokens"] > 0
|
||||
assert usage.get("completion_tokens") and usage["completion_tokens"] > 0
|
||||
assert usage.get("total_tokens") and usage["total_tokens"] > 0
|
||||
|
||||
# Verify spans
|
||||
spans = mock_otlp_collector.get_spans()
|
||||
assert len(spans) == 5
|
||||
|
||||
# we only need this captured one time
|
||||
logged_model_id = None
|
||||
|
||||
for span in spans:
|
||||
attrs = span.attributes
|
||||
assert attrs is not None
|
||||
|
||||
# Root span is created manually by tracing middleware, not by @trace_protocol decorator
|
||||
is_root_span = attrs.get("__root__") is True
|
||||
|
||||
if is_root_span:
|
||||
# Root spans have different attributes
|
||||
assert attrs.get("__location__") in ["library_client", "server"]
|
||||
else:
|
||||
# Non-root spans are created by @trace_protocol decorator
|
||||
assert attrs.get("__autotraced__")
|
||||
assert attrs.get("__class__") and attrs.get("__method__")
|
||||
assert attrs.get("__type__") in ["async", "sync", "async_generator"]
|
||||
|
||||
args = json.loads(attrs["__args__"])
|
||||
if "model_id" in args:
|
||||
logged_model_id = args["model_id"]
|
||||
|
||||
assert logged_model_id is not None
|
||||
assert logged_model_id == text_model_id
|
||||
|
||||
# TODO: re-enable this once metrics get fixed
|
||||
"""
|
||||
# Verify token usage metrics in response
|
||||
metrics = mock_otlp_collector.get_metrics()
|
||||
|
||||
assert metrics
|
||||
for metric in metrics:
|
||||
assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
|
||||
assert metric.unit == "tokens"
|
||||
assert metric.data.data_points and len(metric.data.data_points) == 1
|
||||
match metric.name:
|
||||
case "completion_tokens":
|
||||
assert metric.data.data_points[0].value == usage["completion_tokens"]
|
||||
case "total_tokens":
|
||||
assert metric.data.data_points[0].value == usage["total_tokens"]
|
||||
case "prompt_tokens":
|
||||
assert metric.data.data_points[0].value == usage["prompt_tokens"
|
||||
"""
|
||||
50
tests/unit/distribution/test_stack_list_deps.py
Normal file
50
tests/unit/distribution/test_stack_list_deps.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
from io import StringIO
|
||||
from unittest.mock import patch
|
||||
|
||||
from llama_stack.cli.stack._list_deps import (
|
||||
run_stack_list_deps_command,
|
||||
)
|
||||
|
||||
|
||||
def test_stack_list_deps_basic():
|
||||
args = argparse.Namespace(
|
||||
config=None,
|
||||
env_name="test-env",
|
||||
providers="inference=remote::ollama",
|
||||
format="deps-only",
|
||||
)
|
||||
|
||||
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
|
||||
run_stack_list_deps_command(args)
|
||||
output = mock_stdout.getvalue()
|
||||
|
||||
# deps-only format should NOT include "uv pip install" or "Dependencies for"
|
||||
assert "uv pip install" not in output
|
||||
assert "Dependencies for" not in output
|
||||
|
||||
# Check that expected dependencies are present
|
||||
assert "ollama" in output
|
||||
assert "aiohttp" in output
|
||||
assert "fastapi" in output
|
||||
|
||||
|
||||
def test_stack_list_deps_with_distro_uv():
|
||||
args = argparse.Namespace(
|
||||
config="starter",
|
||||
env_name=None,
|
||||
providers=None,
|
||||
format="uv",
|
||||
)
|
||||
|
||||
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
|
||||
run_stack_list_deps_command(args)
|
||||
output = mock_stdout.getvalue()
|
||||
|
||||
assert "uv pip install" in output
|
||||
38
uv.lock
generated
38
uv.lock
generated
|
|
@ -4129,27 +4129,27 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.12.5"
|
||||
version = "0.9.10"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/30/cd/01015eb5034605fd98d829c5839ec2c6b4582b479707f7c1c2af861e8258/ruff-0.12.5.tar.gz", hash = "sha256:b209db6102b66f13625940b7f8c7d0f18e20039bb7f6101fbdac935c9612057e", size = 5170722, upload-time = "2025-07-24T13:26:37.456Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/20/8e/fafaa6f15c332e73425d9c44ada85360501045d5ab0b81400076aff27cf6/ruff-0.9.10.tar.gz", hash = "sha256:9bacb735d7bada9cfb0f2c227d3658fc443d90a727b47f206fb33f52f3c0eac7", size = 3759776, upload-time = "2025-03-07T15:27:44.363Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/de/ad2f68f0798ff15dd8c0bcc2889558970d9a685b3249565a937cd820ad34/ruff-0.12.5-py3-none-linux_armv6l.whl", hash = "sha256:1de2c887e9dec6cb31fcb9948299de5b2db38144e66403b9660c9548a67abd92", size = 11819133, upload-time = "2025-07-24T13:25:56.369Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/fc/c6b65cd0e7fbe60f17e7ad619dca796aa49fbca34bb9bea5f8faf1ec2643/ruff-0.12.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:d1ab65e7d8152f519e7dea4de892317c9da7a108da1c56b6a3c1d5e7cf4c5e9a", size = 12501114, upload-time = "2025-07-24T13:25:59.471Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/de/c6bec1dce5ead9f9e6a946ea15e8d698c35f19edc508289d70a577921b30/ruff-0.12.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:962775ed5b27c7aa3fdc0d8f4d4433deae7659ef99ea20f783d666e77338b8cf", size = 11716873, upload-time = "2025-07-24T13:26:01.496Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/16/cf372d2ebe91e4eb5b82a2275c3acfa879e0566a7ac94d331ea37b765ac8/ruff-0.12.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b4cae449597e7195a49eb1cdca89fd9fbb16140c7579899e87f4c85bf82f73", size = 11958829, upload-time = "2025-07-24T13:26:03.721Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/25/bf/cd07e8f6a3a6ec746c62556b4c4b79eeb9b0328b362bb8431b7b8afd3856/ruff-0.12.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b13489c3dc50de5e2d40110c0cce371e00186b880842e245186ca862bf9a1ac", size = 11626619, upload-time = "2025-07-24T13:26:06.118Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/c9/c2ccb3b8cbb5661ffda6925f81a13edbb786e623876141b04919d1128370/ruff-0.12.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1504fea81461cf4841778b3ef0a078757602a3b3ea4b008feb1308cb3f23e08", size = 13221894, upload-time = "2025-07-24T13:26:08.292Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6b/58/68a5be2c8e5590ecdad922b2bcd5583af19ba648f7648f95c51c3c1eca81/ruff-0.12.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:c7da4129016ae26c32dfcbd5b671fe652b5ab7fc40095d80dcff78175e7eddd4", size = 14163909, upload-time = "2025-07-24T13:26:10.474Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/d1/ef6b19622009ba8386fdb792c0743f709cf917b0b2f1400589cbe4739a33/ruff-0.12.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca972c80f7ebcfd8af75a0f18b17c42d9f1ef203d163669150453f50ca98ab7b", size = 13583652, upload-time = "2025-07-24T13:26:13.381Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/62/e3/1c98c566fe6809a0c83751d825a03727f242cdbe0d142c9e292725585521/ruff-0.12.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dbbf9f25dfb501f4237ae7501d6364b76a01341c6f1b2cd6764fe449124bb2a", size = 12700451, upload-time = "2025-07-24T13:26:15.488Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/24/ff/96058f6506aac0fbc0d0fc0d60b0d0bd746240a0594657a2d94ad28033ba/ruff-0.12.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c47dea6ae39421851685141ba9734767f960113d51e83fd7bb9958d5be8763a", size = 12937465, upload-time = "2025-07-24T13:26:17.808Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/eb/d3/68bc5e7ab96c94b3589d1789f2dd6dd4b27b263310019529ac9be1e8f31b/ruff-0.12.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5076aa0e61e30f848846f0265c873c249d4b558105b221be1828f9f79903dc5", size = 11771136, upload-time = "2025-07-24T13:26:20.422Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/52/75/7356af30a14584981cabfefcf6106dea98cec9a7af4acb5daaf4b114845f/ruff-0.12.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a5a4c7830dadd3d8c39b1cc85386e2c1e62344f20766be6f173c22fb5f72f293", size = 11601644, upload-time = "2025-07-24T13:26:22.928Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/67/91c71d27205871737cae11025ee2b098f512104e26ffd8656fd93d0ada0a/ruff-0.12.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:46699f73c2b5b137b9dc0fc1a190b43e35b008b398c6066ea1350cce6326adcb", size = 12478068, upload-time = "2025-07-24T13:26:26.134Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/04/b6b00383cf2f48e8e78e14eb258942fdf2a9bf0287fbf5cdd398b749193a/ruff-0.12.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5a655a0a0d396f0f072faafc18ebd59adde8ca85fb848dc1b0d9f024b9c4d3bb", size = 12991537, upload-time = "2025-07-24T13:26:28.533Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3e/b9/053d6445dc7544fb6594785056d8ece61daae7214859ada4a152ad56b6e0/ruff-0.12.5-py3-none-win32.whl", hash = "sha256:dfeb2627c459b0b78ca2bbdc38dd11cc9a0a88bf91db982058b26ce41714ffa9", size = 11751575, upload-time = "2025-07-24T13:26:30.835Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bc/0f/ab16e8259493137598b9149734fec2e06fdeda9837e6f634f5c4e35916da/ruff-0.12.5-py3-none-win_amd64.whl", hash = "sha256:ae0d90cf5f49466c954991b9d8b953bd093c32c27608e409ae3564c63c5306a5", size = 12882273, upload-time = "2025-07-24T13:26:32.929Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/db/c376b0661c24cf770cb8815268190668ec1330eba8374a126ceef8c72d55/ruff-0.12.5-py3-none-win_arm64.whl", hash = "sha256:48cdbfc633de2c5c37d9f090ba3b352d1576b0015bfc3bc98eaf230275b7e805", size = 11951564, upload-time = "2025-07-24T13:26:34.994Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/73/b2/af7c2cc9e438cbc19fafeec4f20bfcd72165460fe75b2b6e9a0958c8c62b/ruff-0.9.10-py3-none-linux_armv6l.whl", hash = "sha256:eb4d25532cfd9fe461acc83498361ec2e2252795b4f40b17e80692814329e42d", size = 10049494, upload-time = "2025-03-07T15:26:51.268Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/12/03f6dfa1b95ddd47e6969f0225d60d9d7437c91938a310835feb27927ca0/ruff-0.9.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:188a6638dab1aa9bb6228a7302387b2c9954e455fb25d6b4470cb0641d16759d", size = 10853584, upload-time = "2025-03-07T15:26:56.104Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/02/49/1c79e0906b6ff551fb0894168763f705bf980864739572b2815ecd3c9df0/ruff-0.9.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5284dcac6b9dbc2fcb71fdfc26a217b2ca4ede6ccd57476f52a587451ebe450d", size = 10155692, upload-time = "2025-03-07T15:27:01.385Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/01/85e8082e41585e0e1ceb11e41c054e9e36fed45f4b210991052d8a75089f/ruff-0.9.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47678f39fa2a3da62724851107f438c8229a3470f533894b5568a39b40029c0c", size = 10369760, upload-time = "2025-03-07T15:27:04.023Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/90/0bc60bd4e5db051f12445046d0c85cc2c617095c0904f1aa81067dc64aea/ruff-0.9.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:99713a6e2766b7a17147b309e8c915b32b07a25c9efd12ada79f217c9c778b3e", size = 9912196, upload-time = "2025-03-07T15:27:06.93Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/ea/0b7e8c42b1ec608033c4d5a02939c82097ddcb0b3e393e4238584b7054ab/ruff-0.9.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524ee184d92f7c7304aa568e2db20f50c32d1d0caa235d8ddf10497566ea1a12", size = 11434985, upload-time = "2025-03-07T15:27:10.082Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/86/3171d1eff893db4f91755175a6e1163c5887be1f1e2f4f6c0c59527c2bfd/ruff-0.9.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:df92aeac30af821f9acf819fc01b4afc3dfb829d2782884f8739fb52a8119a16", size = 12155842, upload-time = "2025-03-07T15:27:12.727Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/89/9e/700ca289f172a38eb0bca752056d0a42637fa17b81649b9331786cb791d7/ruff-0.9.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de42e4edc296f520bb84954eb992a07a0ec5a02fecb834498415908469854a52", size = 11613804, upload-time = "2025-03-07T15:27:15.944Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/92/648020b3b5db180f41a931a68b1c8575cca3e63cec86fd26807422a0dbad/ruff-0.9.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d257f95b65806104b6b1ffca0ea53f4ef98454036df65b1eda3693534813ecd1", size = 13823776, upload-time = "2025-03-07T15:27:18.996Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5e/a6/cc472161cd04d30a09d5c90698696b70c169eeba2c41030344194242db45/ruff-0.9.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60dec7201c0b10d6d11be00e8f2dbb6f40ef1828ee75ed739923799513db24c", size = 11302673, upload-time = "2025-03-07T15:27:21.655Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/db/d31c361c4025b1b9102b4d032c70a69adb9ee6fde093f6c3bf29f831c85c/ruff-0.9.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:d838b60007da7a39c046fcdd317293d10b845001f38bcb55ba766c3875b01e43", size = 10235358, upload-time = "2025-03-07T15:27:24.72Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/86/d6374e24a14d4d93ebe120f45edd82ad7dcf3ef999ffc92b197d81cdc2a5/ruff-0.9.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ccaf903108b899beb8e09a63ffae5869057ab649c1e9231c05ae354ebc62066c", size = 9886177, upload-time = "2025-03-07T15:27:27.282Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/00/62/a61691f6eaaac1e945a1f3f59f1eea9a218513139d5b6c2b8f88b43b5b8f/ruff-0.9.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f9567d135265d46e59d62dc60c0bfad10e9a6822e231f5b24032dba5a55be6b5", size = 10864747, upload-time = "2025-03-07T15:27:30.637Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ee/94/2c7065e1d92a8a8a46d46d9c3cf07b0aa7e0a1e0153d74baa5e6620b4102/ruff-0.9.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5f202f0d93738c28a89f8ed9eaba01b7be339e5d8d642c994347eaa81c6d75b8", size = 11360441, upload-time = "2025-03-07T15:27:33.356Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/8f/1f545ea6f9fcd7bf4368551fb91d2064d8f0577b3079bb3f0ae5779fb773/ruff-0.9.10-py3-none-win32.whl", hash = "sha256:bfb834e87c916521ce46b1788fbb8484966e5113c02df216680102e9eb960029", size = 10247401, upload-time = "2025-03-07T15:27:35.994Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4f/18/fb703603ab108e5c165f52f5b86ee2aa9be43bb781703ec87c66a5f5d604/ruff-0.9.10-py3-none-win_amd64.whl", hash = "sha256:f2160eeef3031bf4b17df74e307d4c5fb689a6f3a26a2de3f7ef4044e3c484f1", size = 11366360, upload-time = "2025-03-07T15:27:38.66Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/35/85/338e603dc68e7d9994d5d84f24adbf69bae760ba5efd3e20f5ff2cec18da/ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69", size = 10436892, upload-time = "2025-03-07T15:27:41.687Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue