chore: rename templates to distributions (#3035)

As the title says. Distributions is in, Templates is out.

`llama stack build --template` --> `llama stack build --distro`. For
backward compatibility, the previous option is kept but results in a
warning.

Updated `server.py` to remove the "config_or_template" backward
compatibility since it has been a couple releases since that change.
This commit is contained in:
Ashwin Bharambe 2025-08-04 11:34:17 -07:00 committed by GitHub
parent 12f964437a
commit cc87995e2b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
87 changed files with 263 additions and 330 deletions

View file

@ -12,7 +12,7 @@ on:
- 'llama_stack/core/build.*' - 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh' - 'llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml' - '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**' - 'llama_stack/distributions/**'
- 'pyproject.toml' - 'pyproject.toml'
pull_request: pull_request:
@ -22,7 +22,7 @@ on:
- 'llama_stack/core/build.*' - 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh' - 'llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml' - '.github/workflows/providers-build.yml'
- 'llama_stack/templates/**' - 'llama_stack/distributions/**'
- 'pyproject.toml' - 'pyproject.toml'
concurrency: concurrency:
@ -33,23 +33,23 @@ jobs:
generate-matrix: generate-matrix:
runs-on: ubuntu-latest runs-on: ubuntu-latest
outputs: outputs:
templates: ${{ steps.set-matrix.outputs.templates }} distros: ${{ steps.set-matrix.outputs.distros }}
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Generate Template List - name: Generate Distribution List
id: set-matrix id: set-matrix
run: | run: |
templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
echo "templates=$templates" >> "$GITHUB_OUTPUT" echo "distros=$distros" >> "$GITHUB_OUTPUT"
build: build:
needs: generate-matrix needs: generate-matrix
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
template: ${{ fromJson(needs.generate-matrix.outputs.templates) }} distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
image-type: [venv, container] image-type: [venv, container]
fail-fast: false # We want to run all jobs even if some fail fail-fast: false # We want to run all jobs even if some fail
@ -62,13 +62,13 @@ jobs:
- name: Print build dependencies - name: Print build dependencies
run: | run: |
uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
- name: Run Llama Stack Build - name: Run Llama Stack Build
run: | run: |
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
# LLAMA_STACK_DIR is set to the current directory so we are building from the source # LLAMA_STACK_DIR is set to the current directory so we are building from the source
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
- name: Print dependencies in the image - name: Print dependencies in the image
if: matrix.image-type == 'venv' if: matrix.image-type == 'venv'
@ -99,16 +99,16 @@ jobs:
- name: Build a single provider - name: Build a single provider
run: | run: |
yq -i '.image_type = "container"' llama_stack/templates/ci-tests/build.yaml yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
yq -i '.image_name = "test"' llama_stack/templates/ci-tests/build.yaml yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/templates/ci-tests/build.yaml USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
- name: Inspect the container image entrypoint - name: Inspect the container image entrypoint
run: | run: |
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1) IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID) entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
echo "Entrypoint: $entrypoint" echo "Entrypoint: $entrypoint"
if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
echo "Entrypoint is not correct" echo "Entrypoint is not correct"
exit 1 exit 1
fi fi
@ -122,27 +122,27 @@ jobs:
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner
- name: Pin template to UBI9 base - name: Pin distribution to UBI9 base
run: | run: |
yq -i ' yq -i '
.image_type = "container" | .image_type = "container" |
.image_name = "ubi9-test" | .image_name = "ubi9-test" |
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest" .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
' llama_stack/templates/ci-tests/build.yaml ' llama_stack/distributions/ci-tests/build.yaml
- name: Build dev container (UBI9) - name: Build dev container (UBI9)
env: env:
USE_COPY_NOT_MOUNT: "true" USE_COPY_NOT_MOUNT: "true"
LLAMA_STACK_DIR: "." LLAMA_STACK_DIR: "."
run: | run: |
uv run llama stack build --config llama_stack/templates/ci-tests/build.yaml uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
- name: Inspect UBI9 image - name: Inspect UBI9 image
run: | run: |
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1) IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID) entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
echo "Entrypoint: $entrypoint" echo "Entrypoint: $entrypoint"
if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then if [ "$entrypoint" != "[python -m llama_stack.core.server.server /app/run.yaml]" ]; then
echo "Entrypoint is not correct" echo "Entrypoint is not correct"
exit 1 exit 1
fi fi

View file

@ -172,7 +172,7 @@ cd work/
git clone https://github.com/meta-llama/llama-stack.git git clone https://github.com/meta-llama/llama-stack.git
git clone https://github.com/meta-llama/llama-stack-client-python.git git clone https://github.com/meta-llama/llama-stack-client-python.git
cd llama-stack cd llama-stack
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...> LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
``` ```
### Updating distribution configurations ### Updating distribution configurations

View file

@ -3,7 +3,7 @@ include llama_stack/models/llama/llama3/tokenizer.model
include llama_stack/models/llama/llama4/tokenizer.model include llama_stack/models/llama/llama4/tokenizer.model
include llama_stack.core/*.sh include llama_stack.core/*.sh
include llama_stack/cli/scripts/*.sh include llama_stack/cli/scripts/*.sh
include llama_stack/templates/*/*.yaml include llama_stack/distributions/*/*.yaml
include llama_stack/providers/tests/test_cases/inference/*.json include llama_stack/providers/tests/test_cases/inference/*.json
include llama_stack/models/llama/*/*.md include llama_stack/models/llama/*/*.md
include llama_stack/tests/integration/*.jpg include llama_stack/tests/integration/*.jpg

View file

@ -123,7 +123,7 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n", " del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n", "\n",
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n", "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
"!uv run --with llama-stack llama stack build --template together --image-type venv \n", "!uv run --with llama-stack llama stack build --distro together --image-type venv \n",
"\n", "\n",
"def run_llama_stack_server_background():\n", "def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n",

View file

@ -233,7 +233,7 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n", " del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n", "\n",
"# this command installs all the dependencies needed for the llama stack server \n", "# this command installs all the dependencies needed for the llama stack server \n",
"!uv run --with llama-stack llama stack build --template meta-reference-gpu --image-type venv \n", "!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv \n",
"\n", "\n",
"def run_llama_stack_server_background():\n", "def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n",

View file

@ -223,7 +223,7 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n", " del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n", "\n",
"# this command installs all the dependencies needed for the llama stack server \n", "# this command installs all the dependencies needed for the llama stack server \n",
"!uv run --with llama-stack llama stack build --template llama_api --image-type venv \n", "!uv run --with llama-stack llama stack build --distro llama_api --image-type venv \n",
"\n", "\n",
"def run_llama_stack_server_background():\n", "def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n",

View file

@ -37,7 +37,7 @@
"\n", "\n",
"To learn more about torchtune: https://github.com/pytorch/torchtune\n", "To learn more about torchtune: https://github.com/pytorch/torchtune\n",
"\n", "\n",
"We will use [experimental-post-training](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/experimental-post-training) as the distribution template\n", "We will use [experimental-post-training](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/distributions/experimental-post-training) as the distribution template\n",
"\n", "\n",
"#### 0.0. Prerequisite: Have an OpenAI API key\n", "#### 0.0. Prerequisite: Have an OpenAI API key\n",
"In this showcase, we will use [braintrust](https://www.braintrust.dev/) as scoring provider for eval and it uses OpenAI model as judge model for scoring. So, you need to get an API key from [OpenAI developer platform](https://platform.openai.com/docs/overview).\n", "In this showcase, we will use [braintrust](https://www.braintrust.dev/) as scoring provider for eval and it uses OpenAI model as judge model for scoring. So, you need to get an API key from [OpenAI developer platform](https://platform.openai.com/docs/overview).\n",
@ -2864,7 +2864,7 @@
} }
], ],
"source": [ "source": [
"!llama stack build --template experimental-post-training --image-type venv --image-name __system__" "!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
] ]
}, },
{ {

View file

@ -38,7 +38,7 @@
"source": [ "source": [
"# NBVAL_SKIP\n", "# NBVAL_SKIP\n",
"!pip install -U llama-stack\n", "!pip install -U llama-stack\n",
"!UV_SYSTEM_PYTHON=1 llama stack build --template fireworks --image-type venv" "!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
] ]
}, },
{ {

View file

@ -57,7 +57,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# NBVAL_SKIP\n", "# NBVAL_SKIP\n",
"!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv" "!UV_SYSTEM_PYTHON=1 llama stack build --distro together --image-type venv"
] ]
}, },
{ {

View file

@ -92,7 +92,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"```bash\n", "```bash\n",
"LLAMA_STACK_DIR=$(pwd) llama stack build --template nvidia --image-type venv\n", "LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
"```" "```"
] ]
}, },

View file

@ -81,7 +81,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"```bash\n", "```bash\n",
"LLAMA_STACK_DIR=$(pwd) llama stack build --template nvidia --image-type venv\n", "LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
"```" "```"
] ]
}, },

View file

@ -145,7 +145,7 @@
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n", " del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n", "\n",
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n", "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
"!uv run --with llama-stack llama stack build --template starter --image-type venv\n", "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
"\n", "\n",
"def run_llama_stack_server_background():\n", "def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n",

View file

@ -43,7 +43,7 @@ We have built-in functionality to run the supported open-benckmarks using llama-
Spin up llama stack server with 'open-benchmark' template Spin up llama stack server with 'open-benchmark' template
``` ```
llama stack run llama_stack/templates/open-benchmark/run.yaml llama stack run llama_stack/distributions/open-benchmark/run.yaml
``` ```

View file

@ -23,7 +23,7 @@ To use the HF SFTTrainer in your Llama Stack project, follow these steps:
You can access the HuggingFace trainer via the `ollama` distribution: You can access the HuggingFace trainer via the `ollama` distribution:
```bash ```bash
llama stack build --template starter --image-type venv llama stack build --distro starter --image-type venv
llama stack run --image-type venv ~/.llama/distributions/ollama/ollama-run.yaml llama stack run --image-type venv ~/.llama/distributions/ollama/ollama-run.yaml
``` ```

View file

@ -97,7 +97,7 @@ To start the Llama Stack Playground, run the following commands:
1. Start up the Llama Stack API server 1. Start up the Llama Stack API server
```bash ```bash
llama stack build --template together --image-type venv llama stack build --distro together --image-type venv
llama stack run together llama stack run together
``` ```

View file

@ -6,7 +6,7 @@ This guide will walk you through the process of adding a new API provider to Lla
- Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.) - Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
- Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally. - Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
- Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary. - Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
- Update any distribution {repopath}`Templates::llama_stack/templates/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation. - Update any distribution {repopath}`Templates::llama_stack/distributions/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
Here are some example PRs to help you get started: Here are some example PRs to help you get started:
@ -52,7 +52,7 @@ def get_base_url(self) -> str:
## Testing the Provider ## Testing the Provider
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --template together`. Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
### 1. Integration Testing ### 1. Integration Testing

View file

@ -141,7 +141,7 @@ You may then pick a template to build your distribution with providers fitted to
For example, to build a distribution with TGI as the inference provider, you can run: For example, to build a distribution with TGI as the inference provider, you can run:
``` ```
$ llama stack build --template starter $ llama stack build --distro starter
... ...
You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml` You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
``` ```
@ -184,10 +184,10 @@ You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack
:::{tab-item} Building from a pre-existing build config file :::{tab-item} Building from a pre-existing build config file
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command. - In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
- The config file will be of contents like the ones in `llama_stack/templates/*build.yaml`. - The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
``` ```
llama stack build --config llama_stack/templates/starter/build.yaml llama stack build --config llama_stack/distributions/starter/build.yaml
``` ```
::: :::
@ -253,11 +253,11 @@ Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podm
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type. To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
``` ```
llama stack build --template starter --image-type container llama stack build --distro starter --image-type container
``` ```
``` ```
$ llama stack build --template starter --image-type container $ llama stack build --distro starter --image-type container
... ...
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
... ...

View file

@ -6,7 +6,7 @@ This avoids the overhead of setting up a server.
```bash ```bash
# setup # setup
uv pip install llama-stack uv pip install llama-stack
llama stack build --template starter --image-type venv llama stack build --distro starter --image-type venv
``` ```
```python ```python

View file

@ -59,7 +59,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
python -m venv stack-fireworks python -m venv stack-fireworks
source stack-fireworks/bin/activate # On Windows: stack-fireworks\Scripts\activate source stack-fireworks/bin/activate # On Windows: stack-fireworks\Scripts\activate
pip install --no-cache llama-stack==0.2.2 pip install --no-cache llama-stack==0.2.2
llama stack build --template fireworks --image-type venv llama stack build --distro fireworks --image-type venv
export FIREWORKS_API_KEY=<SOME_KEY> export FIREWORKS_API_KEY=<SOME_KEY>
llama stack run fireworks --port 5050 llama stack run fireworks --port 5050
``` ```

View file

@ -153,7 +153,7 @@ docker run \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v $HOME/.llama:/root/.llama \ -v $HOME/.llama:/root/.llama \
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
llamastack/distribution-dell \ llamastack/distribution-dell \
--config /root/my-run.yaml \ --config /root/my-run.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
@ -169,7 +169,7 @@ docker run \
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
```bash ```bash
llama stack build --template dell --image-type venv llama stack build --distro dell --image-type venv
llama stack run dell llama stack run dell
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=$INFERENCE_MODEL \ --env INFERENCE_MODEL=$INFERENCE_MODEL \

View file

@ -109,7 +109,7 @@ docker run \
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available. Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
```bash ```bash
llama stack build --template meta-reference-gpu --image-type venv llama stack build --distro meta-reference-gpu --image-type venv
llama stack run distributions/meta-reference-gpu/run.yaml \ llama stack run distributions/meta-reference-gpu/run.yaml \
--port 8321 \ --port 8321 \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct

View file

@ -158,7 +158,7 @@ If you've set up your local development environment, you can also build the imag
```bash ```bash
INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
llama stack build --template nvidia --image-type venv llama stack build --distro nvidia --image-type venv
llama stack run ./run.yaml \ llama stack run ./run.yaml \
--port 8321 \ --port 8321 \
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \ --env NVIDIA_API_KEY=$NVIDIA_API_KEY \

View file

@ -169,7 +169,7 @@ docker run \
Ensure you have configured the starter distribution using the environment variables explained above. Ensure you have configured the starter distribution using the environment variables explained above.
```bash ```bash
uv run --with llama-stack llama stack build --template starter --image-type venv --run uv run --with llama-stack llama stack build --distro starter --image-type venv --run
``` ```
## Example Usage ## Example Usage

View file

@ -59,7 +59,7 @@ Now let's build and run the Llama Stack config for Ollama.
We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables. We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
```bash ```bash
llama stack build --template starter --image-type venv --run llama stack build --distro starter --image-type venv --run
``` ```
::: :::
:::{tab-item} Using `venv` :::{tab-item} Using `venv`
@ -70,7 +70,7 @@ which defines the providers and their settings.
Now let's build and run the Llama Stack config for Ollama. Now let's build and run the Llama Stack config for Ollama.
```bash ```bash
llama stack build --template starter --image-type venv --run llama stack build --distro starter --image-type venv --run
``` ```
::: :::
:::{tab-item} Using a Container :::{tab-item} Using a Container

View file

@ -22,7 +22,7 @@ ollama run llama3.2:3b --keepalive 60m
We will use `uv` to run the Llama Stack server. We will use `uv` to run the Llama Stack server.
```bash ```bash
OLLAMA_URL=http://localhost:11434 \ OLLAMA_URL=http://localhost:11434 \
uv run --with llama-stack llama stack build --template starter --image-type venv --run uv run --with llama-stack llama stack build --distro starter --image-type venv --run
``` ```
#### Step 3: Run the demo #### Step 3: Run the demo
Now open up a new terminal and copy the following script into a file named `demo_script.py`. Now open up a new terminal and copy the following script into a file named `demo_script.py`.

View file

@ -366,7 +366,7 @@ The purpose of scoring function is to calculate the score for each example based
Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe. Firstly, you can see if the existing [llama stack scoring functions](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/scoring) can fulfill your need. If not, you need to write a new scoring function based on what benchmark author / other open source repo describe.
### Add new benchmark into template ### Add new benchmark into template
Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/open-benchmark/run.yaml) Firstly, you need to add the evaluation dataset associated with your benchmark under `datasets` resource in the [open-benchmark](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distributions/open-benchmark/run.yaml)
Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have Secondly, you need to add the new benchmark you just created under the `benchmarks` resource in the same template. To add the new benchmark, you need to have
- `benchmark_id`: identifier of the benchmark - `benchmark_id`: identifier of the benchmark
@ -378,7 +378,7 @@ Secondly, you need to add the new benchmark you just created under the `benchmar
Spin up llama stack server with 'open-benchmark' templates Spin up llama stack server with 'open-benchmark' templates
``` ```
llama stack run llama_stack/templates/open-benchmark/run.yaml llama stack run llama_stack/distributions/open-benchmark/run.yaml
``` ```

View file

@ -76,7 +76,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
1. **Build the Llama Stack**: 1. **Build the Llama Stack**:
Build the Llama Stack using the `starter` template: Build the Llama Stack using the `starter` template:
```bash ```bash
uv run --with llama-stack llama stack build --template starter --image-type venv uv run --with llama-stack llama stack build --distro starter --image-type venv
``` ```
**Expected Output:** **Expected Output:**
```bash ```bash

View file

@ -46,25 +46,25 @@ from llama_stack.core.utils.exec import formulate_run_args, run_command
from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates" DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@lru_cache @lru_cache
def available_templates_specs() -> dict[str, BuildConfig]: def available_distros_specs() -> dict[str, BuildConfig]:
import yaml import yaml
template_specs = {} distro_specs = {}
for p in TEMPLATES_PATH.rglob("*build.yaml"): for p in DISTRIBS_PATH.rglob("*build.yaml"):
template_name = p.parent.name distro_name = p.parent.name
with open(p) as f: with open(p) as f:
build_config = BuildConfig(**yaml.safe_load(f)) build_config = BuildConfig(**yaml.safe_load(f))
template_specs[template_name] = build_config distro_specs[distro_name] = build_config
return template_specs return distro_specs
def run_stack_build_command(args: argparse.Namespace) -> None: def run_stack_build_command(args: argparse.Namespace) -> None:
if args.list_templates: if args.list_distros:
return _run_template_list_cmd() return _run_distro_list_cmd()
if args.image_type == ImageType.VENV.value: if args.image_type == ImageType.VENV.value:
current_venv = os.environ.get("VIRTUAL_ENV") current_venv = os.environ.get("VIRTUAL_ENV")
@ -73,20 +73,30 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
image_name = args.image_name image_name = args.image_name
if args.template: if args.template:
available_templates = available_templates_specs() cprint(
if args.template not in available_templates: "The --template argument is deprecated. Please use --distro instead.",
color="red",
file=sys.stderr,
)
distro_name = args.template
else:
distro_name = args.distribution
if distro_name:
available_distros = available_distros_specs()
if distro_name not in available_distros:
cprint( cprint(
f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates", f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions",
color="red", color="red",
file=sys.stderr, file=sys.stderr,
) )
sys.exit(1) sys.exit(1)
build_config = available_templates[args.template] build_config = available_distros[distro_name]
if args.image_type: if args.image_type:
build_config.image_type = args.image_type build_config.image_type = args.image_type
else: else:
cprint( cprint(
f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {args.template}", f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}",
color="red", color="red",
file=sys.stderr, file=sys.stderr,
) )
@ -136,7 +146,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
sys.exit(1) sys.exit(1)
build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec) build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
elif not args.config and not args.template: elif not args.config and not distro_name:
name = prompt( name = prompt(
"> Enter a name for your Llama Stack (e.g. my-local-stack): ", "> Enter a name for your Llama Stack (e.g. my-local-stack): ",
validator=Validator.from_callable( validator=Validator.from_callable(
@ -218,7 +228,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
sys.exit(1) sys.exit(1)
if args.print_deps_only: if args.print_deps_only:
print(f"# Dependencies for {args.template or args.config or image_name}") print(f"# Dependencies for {distro_name or args.config or image_name}")
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config) normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
normal_deps += SERVER_DEPENDENCIES normal_deps += SERVER_DEPENDENCIES
print(f"uv pip install {' '.join(normal_deps)}") print(f"uv pip install {' '.join(normal_deps)}")
@ -233,7 +243,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
build_config, build_config,
image_name=image_name, image_name=image_name,
config_path=args.config, config_path=args.config,
template_name=args.template, distro_name=distro_name,
) )
except (Exception, RuntimeError) as exc: except (Exception, RuntimeError) as exc:
@ -344,13 +354,13 @@ def _generate_run_config(
def _run_stack_build_command_from_build_config( def _run_stack_build_command_from_build_config(
build_config: BuildConfig, build_config: BuildConfig,
image_name: str | None = None, image_name: str | None = None,
template_name: str | None = None, distro_name: str | None = None,
config_path: str | None = None, config_path: str | None = None,
) -> Path | Traversable: ) -> Path | Traversable:
image_name = image_name or build_config.image_name image_name = image_name or build_config.image_name
if build_config.image_type == LlamaStackImageType.CONTAINER.value: if build_config.image_type == LlamaStackImageType.CONTAINER.value:
if template_name: if distro_name:
image_name = f"distribution-{template_name}" image_name = f"distribution-{distro_name}"
else: else:
if not image_name: if not image_name:
raise ValueError("Please specify an image name when building a container image without a template") raise ValueError("Please specify an image name when building a container image without a template")
@ -364,9 +374,9 @@ def _run_stack_build_command_from_build_config(
if image_name is None: if image_name is None:
raise ValueError("image_name should not be None after validation") raise ValueError("image_name should not be None after validation")
if template_name: if distro_name:
build_dir = DISTRIBS_BASE_DIR / template_name build_dir = DISTRIBS_BASE_DIR / distro_name
build_file_path = build_dir / f"{template_name}-build.yaml" build_file_path = build_dir / f"{distro_name}-build.yaml"
else: else:
if image_name is None: if image_name is None:
raise ValueError("image_name cannot be None") raise ValueError("image_name cannot be None")
@ -377,7 +387,7 @@ def _run_stack_build_command_from_build_config(
run_config_file = None run_config_file = None
# Generate the run.yaml so it can be included in the container image with the proper entrypoint # Generate the run.yaml so it can be included in the container image with the proper entrypoint
# Only do this if we're building a container image and we're not using a template # Only do this if we're building a container image and we're not using a template
if build_config.image_type == LlamaStackImageType.CONTAINER.value and not template_name and config_path: if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path:
cprint("Generating run.yaml file", color="yellow", file=sys.stderr) cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
run_config_file = _generate_run_config(build_config, build_dir, image_name) run_config_file = _generate_run_config(build_config, build_dir, image_name)
@ -411,46 +421,45 @@ def _run_stack_build_command_from_build_config(
return_code = build_image( return_code = build_image(
build_config, build_config,
image_name, image_name,
template_or_config=template_name or config_path or str(build_file_path), distro_or_config=distro_name or config_path or str(build_file_path),
run_config=run_config_file.as_posix() if run_config_file else None, run_config=run_config_file.as_posix() if run_config_file else None,
) )
if return_code != 0: if return_code != 0:
raise RuntimeError(f"Failed to build image {image_name}") raise RuntimeError(f"Failed to build image {image_name}")
if template_name: if distro_name:
# copy run.yaml from template to build_dir instead of generating it again # copy run.yaml from distribution to build_dir instead of generating it again
template_path = importlib.resources.files("llama_stack") / f"templates/{template_name}/run.yaml" distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml"
run_config_file = build_dir / f"{template_name}-run.yaml" run_config_file = build_dir / f"{distro_name}-run.yaml"
with importlib.resources.as_file(template_path) as path: with importlib.resources.as_file(distro_path) as path:
shutil.copy(path, run_config_file) shutil.copy(path, run_config_file)
cprint("Build Successful!", color="green", file=sys.stderr) cprint("Build Successful!", color="green", file=sys.stderr)
cprint(f"You can find the newly-built template here: {run_config_file}", color="blue", file=sys.stderr) cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
cprint( cprint(
"You can run the new Llama Stack distro via: " "You can run the new Llama Stack distro via: "
+ colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"), + colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
color="green", color="green",
file=sys.stderr, file=sys.stderr,
) )
return template_path return distro_path
else: else:
return _generate_run_config(build_config, build_dir, image_name) return _generate_run_config(build_config, build_dir, image_name)
def _run_template_list_cmd() -> None: def _run_distro_list_cmd() -> None:
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
headers = [ headers = [
"Template Name", "Distribution Name",
# "Providers", # "Providers",
"Description", "Description",
] ]
rows = [] rows = []
for template_name, spec in available_templates_specs().items(): for distro_name, spec in available_distros_specs().items():
rows.append( rows.append(
[ [
template_name, distro_name,
# json.dumps(spec.distribution_spec.providers, indent=2), # json.dumps(spec.distribution_spec.providers, indent=2),
spec.distribution_spec.description, spec.distribution_spec.description,
] ]

View file

@ -34,14 +34,24 @@ class StackBuild(Subcommand):
"--template", "--template",
type=str, type=str,
default=None, default=None,
help="Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates", help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
)
self.parser.add_argument(
"--distro",
"--distribution",
dest="distribution",
type=str,
default=None,
help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
) )
self.parser.add_argument( self.parser.add_argument(
"--list-templates", "--list-distros",
"--list-distributions",
action="store_true", action="store_true",
dest="list_distros",
default=False, default=False,
help="Show the available templates for building a Llama Stack distribution", help="Show the available distributions for building a Llama Stack distribution",
) )
self.parser.add_argument( self.parser.add_argument(

View file

@ -35,8 +35,8 @@ class StackRun(Subcommand):
"config", "config",
type=str, type=str,
nargs="?", # Make it optional nargs="?", # Make it optional
metavar="config | template", metavar="config | distro",
help="Path to config file to use for the run or name of known template (`llama stack list` for a list).", help="Path to config file to use for the run or name of known distro (`llama stack list` for a list).",
) )
self.parser.add_argument( self.parser.add_argument(
"--port", "--port",
@ -68,22 +68,22 @@ class StackRun(Subcommand):
help="Start the UI server", help="Start the UI server",
) )
def _resolve_config_and_template(self, args: argparse.Namespace) -> tuple[Path | None, str | None]: def _resolve_config_and_distro(self, args: argparse.Namespace) -> tuple[Path | None, str | None]:
"""Resolve config file path and template name from args.config""" """Resolve config file path and distribution name from args.config"""
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
if not args.config: if not args.config:
return None, None return None, None
config_file = Path(args.config) config_file = Path(args.config)
has_yaml_suffix = args.config.endswith(".yaml") has_yaml_suffix = args.config.endswith(".yaml")
template_name = None distro_name = None
if not config_file.exists() and not has_yaml_suffix: if not config_file.exists() and not has_yaml_suffix:
# check if this is a template # check if this is a distribution
config_file = Path(REPO_ROOT) / "llama_stack" / "templates" / args.config / "run.yaml" config_file = Path(REPO_ROOT) / "llama_stack" / "distributions" / args.config / "run.yaml"
if config_file.exists(): if config_file.exists():
template_name = args.config distro_name = args.config
if not config_file.exists() and not has_yaml_suffix: if not config_file.exists() and not has_yaml_suffix:
# check if it's a build config saved to ~/.llama dir # check if it's a build config saved to ~/.llama dir
@ -99,7 +99,7 @@ class StackRun(Subcommand):
f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}" f"Config file must be a valid file path, '{config_file}' is not a file: type={type(config_file)}"
) )
return config_file, template_name return config_file, distro_name
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
import yaml import yaml
@ -113,9 +113,9 @@ class StackRun(Subcommand):
if args.config: if args.config:
try: try:
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
config_file = resolve_config_or_template(args.config, Mode.RUN) config_file = resolve_config_or_distro(args.config, Mode.RUN)
except ValueError as e: except ValueError as e:
self.parser.error(str(e)) self.parser.error(str(e))
else: else:

View file

@ -11,38 +11,19 @@ from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="cli") logger = get_logger(name=__name__, category="cli")
def add_config_template_args(parser: argparse.ArgumentParser): # TODO: this can probably just be inlined now?
"""Add unified config/template arguments with backward compatibility.""" def add_config_distro_args(parser: argparse.ArgumentParser):
"""Add unified config/distro arguments."""
group = parser.add_mutually_exclusive_group(required=True) group = parser.add_mutually_exclusive_group(required=True)
group.add_argument( group.add_argument(
"config", "config",
nargs="?", nargs="?",
help="Configuration file path or template name", help="Configuration file path or distribution name",
)
# Backward compatibility arguments (deprecated)
group.add_argument(
"--config",
dest="config_deprecated",
help="(DEPRECATED) Use positional argument [config] instead. Configuration file path",
)
group.add_argument(
"--template",
dest="template_deprecated",
help="(DEPRECATED) Use positional argument [config] instead. Template name",
) )
def get_config_from_args(args: argparse.Namespace) -> str | None: def get_config_from_args(args: argparse.Namespace) -> str | None:
"""Extract config value from parsed arguments, handling both new and deprecated forms."""
if args.config is not None: if args.config is not None:
return str(args.config) return str(args.config)
elif hasattr(args, "config_deprecated") and args.config_deprecated is not None:
logger.warning("Using deprecated --config argument. Use positional argument [config] instead.")
return str(args.config_deprecated)
elif hasattr(args, "template_deprecated") and args.template_deprecated is not None:
logger.warning("Using deprecated --template argument. Use positional argument [config] instead.")
return str(args.template_deprecated)
return None return None

View file

@ -16,8 +16,8 @@ from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.external import load_external_apis from llama_stack.core.external import load_external_apis
from llama_stack.core.utils.exec import run_command from llama_stack.core.utils.exec import run_command
from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.distributions.template import DistributionTemplate
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from llama_stack.templates.template import DistributionTemplate
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -106,7 +106,7 @@ def print_pip_install_help(config: BuildConfig):
def build_image( def build_image(
build_config: BuildConfig, build_config: BuildConfig,
image_name: str, image_name: str,
template_or_config: str, distro_or_config: str,
run_config: str | None = None, run_config: str | None = None,
): ):
container_base = build_config.distribution_spec.container_image or "python:3.12-slim" container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
@ -123,8 +123,8 @@ def build_image(
script = str(importlib.resources.files("llama_stack") / "core/build_container.sh") script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
args = [ args = [
script, script,
"--template-or-config", "--distro-or-config",
template_or_config, distro_or_config,
"--image-name", "--image-name",
image_name, image_name,
"--container-base", "--container-base",

View file

@ -43,7 +43,7 @@ normal_deps=""
external_provider_deps="" external_provider_deps=""
optional_deps="" optional_deps=""
run_config="" run_config=""
template_or_config="" distro_or_config=""
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
key="$1" key="$1"
@ -96,12 +96,12 @@ while [[ $# -gt 0 ]]; do
run_config="$2" run_config="$2"
shift 2 shift 2
;; ;;
--template-or-config) --distro-or-config)
if [[ -z "$2" || "$2" == --* ]]; then if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --template-or-config requires a string value" >&2 echo "Error: --distro-or-config requires a string value" >&2
usage usage
fi fi
template_or_config="$2" distro_or_config="$2"
shift 2 shift 2
;; ;;
*) *)
@ -327,12 +327,11 @@ EOF
# If a run config is provided, we use the --config flag # If a run config is provided, we use the --config flag
if [[ -n "$run_config" ]]; then if [[ -n "$run_config" ]]; then
add_to_container << EOF add_to_container << EOF
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--config", "$RUN_CONFIG_PATH"] ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$RUN_CONFIG_PATH"]
EOF EOF
# If a template is provided (not a yaml file), we use the --template flag elif [[ "$distro_or_config" != *.yaml ]]; then
elif [[ "$template_or_config" != *.yaml ]]; then
add_to_container << EOF add_to_container << EOF
ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--template", "$template_or_config"] ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "$distro_or_config"]
EOF EOF
fi fi

View file

@ -42,7 +42,7 @@ from llama_stack.core.resolver import ProviderRegistry
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
from llama_stack.core.stack import ( from llama_stack.core.stack import (
construct_stack, construct_stack,
get_stack_run_config_from_template, get_stack_run_config_from_distro,
replace_env_vars, replace_env_vars,
) )
from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config import redact_sensitive_fields
@ -138,14 +138,14 @@ class LibraryClientHttpxResponse:
class LlamaStackAsLibraryClient(LlamaStackClient): class LlamaStackAsLibraryClient(LlamaStackClient):
def __init__( def __init__(
self, self,
config_path_or_template_name: str, config_path_or_distro_name: str,
skip_logger_removal: bool = False, skip_logger_removal: bool = False,
custom_provider_registry: ProviderRegistry | None = None, custom_provider_registry: ProviderRegistry | None = None,
provider_data: dict[str, Any] | None = None, provider_data: dict[str, Any] | None = None,
): ):
super().__init__() super().__init__()
self.async_client = AsyncLlamaStackAsLibraryClient( self.async_client = AsyncLlamaStackAsLibraryClient(
config_path_or_template_name, custom_provider_registry, provider_data config_path_or_distro_name, custom_provider_registry, provider_data
) )
self.pool_executor = ThreadPoolExecutor(max_workers=4) self.pool_executor = ThreadPoolExecutor(max_workers=4)
self.skip_logger_removal = skip_logger_removal self.skip_logger_removal = skip_logger_removal
@ -212,7 +212,7 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
def __init__( def __init__(
self, self,
config_path_or_template_name: str, config_path_or_distro_name: str,
custom_provider_registry: ProviderRegistry | None = None, custom_provider_registry: ProviderRegistry | None = None,
provider_data: dict[str, Any] | None = None, provider_data: dict[str, Any] | None = None,
): ):
@ -222,17 +222,17 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
current_sinks = os.environ.get("TELEMETRY_SINKS", "sqlite").split(",") current_sinks = os.environ.get("TELEMETRY_SINKS", "sqlite").split(",")
os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console") os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
if config_path_or_template_name.endswith(".yaml"): if config_path_or_distro_name.endswith(".yaml"):
config_path = Path(config_path_or_template_name) config_path = Path(config_path_or_distro_name)
if not config_path.exists(): if not config_path.exists():
raise ValueError(f"Config file {config_path} does not exist") raise ValueError(f"Config file {config_path} does not exist")
config_dict = replace_env_vars(yaml.safe_load(config_path.read_text())) config_dict = replace_env_vars(yaml.safe_load(config_path.read_text()))
config = parse_and_maybe_upgrade_config(config_dict) config = parse_and_maybe_upgrade_config(config_dict)
else: else:
# template # distribution
config = get_stack_run_config_from_template(config_path_or_template_name) config = get_stack_run_config_from_distro(config_path_or_distro_name)
self.config_path_or_template_name = config_path_or_template_name self.config_path_or_distro_name = config_path_or_distro_name
self.config = config self.config = config
self.custom_provider_registry = custom_provider_registry self.custom_provider_registry = custom_provider_registry
self.provider_data = provider_data self.provider_data = provider_data
@ -245,11 +245,11 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
except ModuleNotFoundError as _e: except ModuleNotFoundError as _e:
cprint(_e.msg, color="red", file=sys.stderr) cprint(_e.msg, color="red", file=sys.stderr)
cprint( cprint(
"Using llama-stack as a library requires installing dependencies depending on the template (providers) you choose.\n", "Using llama-stack as a library requires installing dependencies depending on the distribution (providers) you choose.\n",
color="yellow", color="yellow",
file=sys.stderr, file=sys.stderr,
) )
if self.config_path_or_template_name.endswith(".yaml"): if self.config_path_or_distro_name.endswith(".yaml"):
providers: dict[str, list[BuildProvider]] = {} providers: dict[str, list[BuildProvider]] = {}
for api, run_providers in self.config.providers.items(): for api, run_providers in self.config.providers.items():
for provider in run_providers: for provider in run_providers:
@ -267,7 +267,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
else: else:
prefix = "!" if in_notebook() else "" prefix = "!" if in_notebook() else ""
cprint( cprint(
f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n", f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n",
"yellow", "yellow",
file=sys.stderr, file=sys.stderr,
) )
@ -283,7 +283,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
if not os.environ.get("PYTEST_CURRENT_TEST"): if not os.environ.get("PYTEST_CURRENT_TEST"):
console = Console() console = Console()
console.print(f"Using config [blue]{self.config_path_or_template_name}[/blue]:") console.print(f"Using config [blue]{self.config_path_or_distro_name}[/blue]:")
safe_config = redact_sensitive_fields(self.config.model_dump()) safe_config = redact_sensitive_fields(self.config.model_dump())
console.print(yaml.dump(safe_config, indent=2)) console.print(yaml.dump(safe_config, indent=2))

View file

@ -32,7 +32,7 @@ from openai import BadRequestError
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.cli.utils import add_config_template_args, get_config_from_args from llama_stack.cli.utils import add_config_distro_args, get_config_from_args
from llama_stack.core.access_control.access_control import AccessDeniedError from llama_stack.core.access_control.access_control import AccessDeniedError
from llama_stack.core.datatypes import ( from llama_stack.core.datatypes import (
AuthenticationRequiredError, AuthenticationRequiredError,
@ -60,7 +60,7 @@ from llama_stack.core.stack import (
validate_env_pair, validate_env_pair,
) )
from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
@ -377,7 +377,7 @@ def main(args: argparse.Namespace | None = None):
"""Start the LlamaStack server.""" """Start the LlamaStack server."""
parser = argparse.ArgumentParser(description="Start the LlamaStack server.") parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
add_config_template_args(parser) add_config_distro_args(parser)
parser.add_argument( parser.add_argument(
"--port", "--port",
type=int, type=int,
@ -396,8 +396,8 @@ def main(args: argparse.Namespace | None = None):
if args is None: if args is None:
args = parser.parse_args() args = parser.parse_args()
config_or_template = get_config_from_args(args) config_or_distro = get_config_from_args(args)
config_file = resolve_config_or_template(config_or_template, Mode.RUN) config_file = resolve_config_or_distro(config_or_distro, Mode.RUN)
logger_config = None logger_config = None
with open(config_file) as fp: with open(config_file) as fp:

View file

@ -389,12 +389,12 @@ async def refresh_registry_task(impls: dict[Api, Any]):
await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS) await asyncio.sleep(REGISTRY_REFRESH_INTERVAL_SECONDS)
def get_stack_run_config_from_template(template: str) -> StackRunConfig: def get_stack_run_config_from_distro(distro: str) -> StackRunConfig:
template_path = importlib.resources.files("llama_stack") / f"templates/{template}/run.yaml" distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro}/run.yaml"
with importlib.resources.as_file(template_path) as path: with importlib.resources.as_file(distro_path) as path:
if not path.exists(): if not path.exists():
raise ValueError(f"Template '{template}' not found at {template_path}") raise ValueError(f"Distribution '{distro}' not found at {distro_path}")
run_config = yaml.safe_load(path.open()) run_config = yaml.safe_load(path.open())
return StackRunConfig(**replace_env_vars(run_config)) return StackRunConfig(**replace_env_vars(run_config))

View file

@ -9,7 +9,7 @@
1. Start up Llama Stack API server. More details [here](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html). 1. Start up Llama Stack API server. More details [here](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).
``` ```
llama stack build --template together --image-type venv llama stack build --distro together --image-type venv
llama stack run together llama stack run together
``` ```

View file

@ -13,7 +13,7 @@ from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="config_resolution") logger = get_logger(name=__name__, category="config_resolution")
TEMPLATE_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "templates" DISTRO_DIR = Path(__file__).parent.parent.parent.parent / "llama_stack" / "distributions"
class Mode(StrEnum): class Mode(StrEnum):
@ -21,15 +21,15 @@ class Mode(StrEnum):
BUILD = "build" BUILD = "build"
def resolve_config_or_template( def resolve_config_or_distro(
config_or_template: str, config_or_distro: str,
mode: Mode = Mode.RUN, mode: Mode = Mode.RUN,
) -> Path: ) -> Path:
""" """
Resolve a config/template argument to a concrete config file path. Resolve a config/distro argument to a concrete config file path.
Args: Args:
config_or_template: User input (file path, template name, or built distribution) config_or_distro: User input (file path, distribution name, or built distribution)
mode: Mode resolving for ("run", "build", "server") mode: Mode resolving for ("run", "build", "server")
Returns: Returns:
@ -40,86 +40,86 @@ def resolve_config_or_template(
""" """
# Strategy 1: Try as file path first # Strategy 1: Try as file path first
config_path = Path(config_or_template) config_path = Path(config_or_distro)
if config_path.exists() and config_path.is_file(): if config_path.exists() and config_path.is_file():
logger.info(f"Using file path: {config_path}") logger.info(f"Using file path: {config_path}")
return config_path.resolve() return config_path.resolve()
# Strategy 2: Try as template name (if no .yaml extension) # Strategy 2: Try as distribution name (if no .yaml extension)
if not config_or_template.endswith(".yaml"): if not config_or_distro.endswith(".yaml"):
template_config = _get_template_config_path(config_or_template, mode) distro_config = _get_distro_config_path(config_or_distro, mode)
if template_config.exists(): if distro_config.exists():
logger.info(f"Using template: {template_config}") logger.info(f"Using distribution: {distro_config}")
return template_config return distro_config
# Strategy 3: Try as built distribution name # Strategy 3: Try as built distribution name
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_template}" / f"{config_or_template}-{mode}.yaml" distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
if distrib_config.exists(): if distrib_config.exists():
logger.info(f"Using built distribution: {distrib_config}") logger.info(f"Using built distribution: {distrib_config}")
return distrib_config return distrib_config
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_template}" / f"{config_or_template}-{mode}.yaml" distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
if distrib_config.exists(): if distrib_config.exists():
logger.info(f"Using built distribution: {distrib_config}") logger.info(f"Using built distribution: {distrib_config}")
return distrib_config return distrib_config
# Strategy 4: Failed - provide helpful error # Strategy 4: Failed - provide helpful error
raise ValueError(_format_resolution_error(config_or_template, mode)) raise ValueError(_format_resolution_error(config_or_distro, mode))
def _get_template_config_path(template_name: str, mode: Mode) -> Path: def _get_distro_config_path(distro_name: str, mode: Mode) -> Path:
"""Get the config file path for a template.""" """Get the config file path for a distro."""
return TEMPLATE_DIR / template_name / f"{mode}.yaml" return DISTRO_DIR / distro_name / f"{mode}.yaml"
def _format_resolution_error(config_or_template: str, mode: Mode) -> str: def _format_resolution_error(config_or_distro: str, mode: Mode) -> str:
"""Format a helpful error message for resolution failures.""" """Format a helpful error message for resolution failures."""
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
template_path = _get_template_config_path(config_or_template, mode) distro_path = _get_distro_config_path(config_or_distro, mode)
distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_template}" / f"{config_or_template}-{mode}.yaml" distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_template}" / f"{config_or_template}-{mode}.yaml" distrib_path2 = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
available_templates = _get_available_templates() available_distros = _get_available_distros()
templates_str = ", ".join(available_templates) if available_templates else "none found" distros_str = ", ".join(available_distros) if available_distros else "none found"
return f"""Could not resolve config or template '{config_or_template}'. return f"""Could not resolve config or distribution '{config_or_distro}'.
Tried the following locations: Tried the following locations:
1. As file path: {Path(config_or_template).resolve()} 1. As file path: {Path(config_or_distro).resolve()}
2. As template: {template_path} 2. As distribution: {distro_path}
3. As built distribution: ({distrib_path}, {distrib_path2}) 3. As built distribution: ({distrib_path}, {distrib_path2})
Available templates: {templates_str} Available distributions: {distros_str}
Did you mean one of these templates? Did you mean one of these distributions?
{_format_template_suggestions(available_templates, config_or_template)} {_format_distro_suggestions(available_distros, config_or_distro)}
""" """
def _get_available_templates() -> list[str]: def _get_available_distros() -> list[str]:
"""Get list of available template names.""" """Get list of available distro names."""
if not TEMPLATE_DIR.exists() and not DISTRIBS_BASE_DIR.exists(): if not DISTRO_DIR.exists() and not DISTRIBS_BASE_DIR.exists():
return [] return []
return list( return list(
set( set(
[d.name for d in TEMPLATE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")] [d.name for d in DISTRO_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
+ [d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")] + [d.name for d in DISTRIBS_BASE_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")]
) )
) )
def _format_template_suggestions(templates: list[str], user_input: str) -> str: def _format_distro_suggestions(distros: list[str], user_input: str) -> str:
"""Format template suggestions for error messages, showing closest matches first.""" """Format distro suggestions for error messages, showing closest matches first."""
if not templates: if not distros:
return " (no templates found)" return " (no distros found)"
import difflib import difflib
# Get up to 3 closest matches with similarity threshold of 0.3 (lower = more permissive) # Get up to 3 closest matches with similarity threshold of 0.3 (lower = more permissive)
close_matches = difflib.get_close_matches(user_input, templates, n=3, cutoff=0.3) close_matches = difflib.get_close_matches(user_input, distros, n=3, cutoff=0.3)
display_templates = close_matches if close_matches else templates[:3] display_distros = close_matches if close_matches else distros[:3]
suggestions = [f" - {t}" for t in display_templates] suggestions = [f" - {d}" for d in display_distros]
return "\n".join(suggestions) return "\n".join(suggestions)

View file

@ -5,7 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.templates.template import DistributionTemplate from llama_stack.distributions.template import DistributionTemplate
from ..starter.starter import get_distribution_template as get_starter_distribution_template from ..starter.starter import get_distribution_template as get_starter_distribution_template

View file

@ -12,10 +12,10 @@ from llama_stack.core.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:

View file

@ -141,7 +141,7 @@ docker run \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v $HOME/.llama:/root/.llama \ -v $HOME/.llama:/root/.llama \
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
llamastack/distribution-{{ name }} \ llamastack/distribution-{{ name }} \
--config /root/my-run.yaml \ --config /root/my-run.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
@ -157,7 +157,7 @@ docker run \
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
```bash ```bash
llama stack build --template {{ name }} --image-type conda llama stack build --distro {{ name }} --image-type conda
llama stack run {{ name }} llama stack run {{ name }}
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=$INFERENCE_MODEL \ --env INFERENCE_MODEL=$INFERENCE_MODEL \

View file

@ -97,7 +97,7 @@ docker run \
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available. Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
```bash ```bash
llama stack build --template {{ name }} --image-type venv llama stack build --distro {{ name }} --image-type venv
llama stack run distributions/{{ name }}/run.yaml \ llama stack run distributions/{{ name }}/run.yaml \
--port 8321 \ --port 8321 \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct

View file

@ -14,6 +14,7 @@ from llama_stack.core.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
from llama_stack.providers.inline.inference.meta_reference import ( from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceInferenceConfig, MetaReferenceInferenceConfig,
) )
@ -21,7 +22,6 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:

View file

@ -130,7 +130,7 @@ If you've set up your local development environment, you can also build the imag
```bash ```bash
INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
llama stack build --template nvidia --image-type venv llama stack build --distro nvidia --image-type venv
llama stack run ./run.yaml \ llama stack run ./run.yaml \
--port 8321 \ --port 8321 \
--env NVIDIA_API_KEY=$NVIDIA_API_KEY \ --env NVIDIA_API_KEY=$NVIDIA_API_KEY \

View file

@ -7,12 +7,12 @@
from pathlib import Path from pathlib import Path
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ShieldInput, ToolGroupInput from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ShieldInput, ToolGroupInput
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:

View file

@ -16,6 +16,11 @@ from llama_stack.core.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.distributions.template import (
DistributionTemplate,
RunConfigSettings,
get_model_registry,
)
from llama_stack.providers.inline.vector_io.sqlite_vec.config import ( from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
SQLiteVectorIOConfig, SQLiteVectorIOConfig,
) )
@ -29,11 +34,6 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
PGVectorVectorIOConfig, PGVectorVectorIOConfig,
) )
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
get_model_registry,
)
def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]: def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:

View file

@ -13,15 +13,15 @@ from llama_stack.core.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.distributions.template import (
DistributionTemplate,
RunConfigSettings,
)
from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig from llama_stack.providers.inline.inference.sentence_transformers import SentenceTransformersInferenceConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
)
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:

View file

@ -15,6 +15,10 @@ from llama_stack.core.datatypes import (
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import (
DistributionTemplate,
RunConfigSettings,
)
from llama_stack.providers.datatypes import RemoteProviderSpec from llama_stack.providers.datatypes import RemoteProviderSpec
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
@ -33,10 +37,6 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
PGVectorVectorIOConfig, PGVectorVectorIOConfig,
) )
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
)
def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]: def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:

View file

@ -8,12 +8,12 @@ from pathlib import Path
from llama_stack.apis.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:

View file

@ -20,7 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service.
Build the NVIDIA environment: Build the NVIDIA environment:
```bash ```bash
llama stack build --template nvidia --image-type venv llama stack build --distro nvidia --image-type venv
``` ```
### Basic Usage using the LlamaStack Python Client ### Basic Usage using the LlamaStack Python Client

View file

@ -18,7 +18,7 @@ This provider enables running inference using NVIDIA NIM.
Build the NVIDIA environment: Build the NVIDIA environment:
```bash ```bash
llama stack build --template nvidia --image-type venv llama stack build --distro nvidia --image-type venv
``` ```
### Basic Usage using the LlamaStack Python Client ### Basic Usage using the LlamaStack Python Client

View file

@ -22,7 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service
Build the NVIDIA environment: Build the NVIDIA environment:
```bash ```bash
llama stack build --template nvidia --image-type venv llama stack build --distro nvidia --image-type venv
``` ```
### Basic Usage using the LlamaStack Python Client ### Basic Usage using the LlamaStack Python Client

View file

@ -19,7 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV
Build the NVIDIA environment: Build the NVIDIA environment:
```bash ```bash
llama stack build --template nvidia --image-type venv llama stack build --distro nvidia --image-type venv
``` ```
### Basic Usage using the LlamaStack Python Client ### Basic Usage using the LlamaStack Python Client

View file

@ -327,10 +327,10 @@ exclude = [
"^llama_stack/strong_typing/inspection\\.py$", "^llama_stack/strong_typing/inspection\\.py$",
"^llama_stack/strong_typing/schema\\.py$", "^llama_stack/strong_typing/schema\\.py$",
"^llama_stack/strong_typing/serializer\\.py$", "^llama_stack/strong_typing/serializer\\.py$",
"^llama_stack/templates/groq/groq\\.py$", "^llama_stack/distributions/groq/groq\\.py$",
"^llama_stack/templates/llama_api/llama_api\\.py$", "^llama_stack/distributions/llama_api/llama_api\\.py$",
"^llama_stack/templates/sambanova/sambanova\\.py$", "^llama_stack/distributions/sambanova/sambanova\\.py$",
"^llama_stack/templates/template\\.py$", "^llama_stack/distributions/template\\.py$",
] ]
[[tool.mypy.overrides]] [[tool.mypy.overrides]]

View file

@ -34,39 +34,39 @@ class ChangedPathTracker:
return self._changed_paths return self._changed_paths
def find_template_dirs(templates_dir: Path) -> Iterable[Path]: def find_distro_dirs(distro_dir: Path) -> Iterable[Path]:
"""Find immediate subdirectories in the templates folder.""" """Find immediate subdirectories in the distributions folder."""
if not templates_dir.exists(): if not distro_dir.exists():
raise FileNotFoundError(f"Templates directory not found: {templates_dir}") raise FileNotFoundError(f"Distributions directory not found: {distro_dir}")
return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__") return sorted(d for d in distro_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
def process_template(template_dir: Path, progress, change_tracker: ChangedPathTracker) -> None: def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracker) -> None:
"""Process a single template directory.""" """Process a single distribution directory."""
progress.print(f"Processing {template_dir.name}") progress.print(f"Processing {distro_dir.name}")
try: try:
# Import the module directly # Import the module directly
module_name = f"llama_stack.templates.{template_dir.name}" module_name = f"llama_stack.distributions.{distro_dir.name}"
module = importlib.import_module(module_name) module = importlib.import_module(module_name)
# Get and save the distribution template # Get and save the distribution template
if template_func := getattr(module, "get_distribution_template", None): if template_func := getattr(module, "get_distribution_template", None):
template = template_func() distro = template_func()
yaml_output_dir = REPO_ROOT / "llama_stack" / "templates" / template.name yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name
doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{template.distro_type}_distro" doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{distro.distro_type}_distro"
change_tracker.add_paths(yaml_output_dir, doc_output_dir) change_tracker.add_paths(yaml_output_dir, doc_output_dir)
template.save_distribution( distro.save_distribution(
yaml_output_dir=yaml_output_dir, yaml_output_dir=yaml_output_dir,
doc_output_dir=doc_output_dir, doc_output_dir=doc_output_dir,
) )
else: else:
progress.print(f"[yellow]Warning: {template_dir.name} has no get_distribution_template function") progress.print(f"[yellow]Warning: {distro_dir.name} has no get_distribution_template function")
except Exception as e: except Exception as e:
progress.print(f"[red]Error processing {template_dir.name}: {str(e)}") progress.print(f"[red]Error processing {distro_dir.name}: {str(e)}")
raise e raise e
@ -85,38 +85,38 @@ def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
return has_changes return has_changes
def pre_import_templates(template_dirs: list[Path]) -> None: def pre_import_distros(distro_dirs: list[Path]) -> None:
# Pre-import all template modules to avoid deadlocks. # Pre-import all distro modules to avoid deadlocks.
for template_dir in template_dirs: for distro_dir in distro_dirs:
module_name = f"llama_stack.templates.{template_dir.name}" module_name = f"llama_stack.distributions.{distro_dir.name}"
importlib.import_module(module_name) importlib.import_module(module_name)
def main(): def main():
templates_dir = REPO_ROOT / "llama_stack" / "templates" distros_dir = REPO_ROOT / "llama_stack" / "distributions"
change_tracker = ChangedPathTracker() change_tracker = ChangedPathTracker()
with Progress( with Progress(
SpinnerColumn(), SpinnerColumn(),
TextColumn("[progress.description]{task.description}"), TextColumn("[progress.description]{task.description}"),
) as progress: ) as progress:
template_dirs = list(find_template_dirs(templates_dir)) distro_dirs = list(find_distro_dirs(distros_dir))
task = progress.add_task("Processing distribution templates...", total=len(template_dirs)) task = progress.add_task("Processing distribution templates...", total=len(distro_dirs))
pre_import_templates(template_dirs) pre_import_distros(distro_dirs)
# Create a partial function with the progress bar # Create a partial function with the progress bar
process_func = partial(process_template, progress=progress, change_tracker=change_tracker) process_func = partial(process_distro, progress=progress, change_tracker=change_tracker)
# Process templates in parallel # Process distributions in parallel
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
# Submit all tasks and wait for completion # Submit all tasks and wait for completion
list(executor.map(process_func, template_dirs)) list(executor.map(process_func, distro_dirs))
progress.update(task, advance=len(template_dirs)) progress.update(task, advance=len(distro_dirs))
if check_for_changes(change_tracker): if check_for_changes(change_tracker):
print( print(
"Distribution template changes detected. Please commit the changes.", "Distribution changes detected. Please commit the changes.",
file=sys.stderr, file=sys.stderr,
) )
sys.exit(1) sys.exit(1)

View file

@ -1,66 +0,0 @@
#!/usr/bin/env python
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import os
from pathlib import Path
import pytest
"""
Script for running api on AsyncLlamaStackAsLibraryClient with templates
Assuming directory structure:
- llama-stack
- scripts
- tests
- api
Example command:
cd llama-stack
EXPORT TOGETHER_API_KEY=<..>
EXPORT FIREWORKS_API_KEY=<..>
./scripts/run_client_sdk_tests.py --templates together fireworks --report
"""
REPO_ROOT = Path(__file__).parent.parent
CLIENT_SDK_TESTS_RELATIVE_PATH = "tests/api/"
def main(parser: argparse.ArgumentParser):
args = parser.parse_args()
templates_dir = REPO_ROOT / "llama_stack" / "templates"
user_specified_templates = [templates_dir / t for t in args.templates] if args.templates else []
for d in templates_dir.iterdir():
if d.is_dir() and d.name != "__pycache__":
template_configs = list(d.rglob("run.yaml"))
if len(template_configs) == 0:
continue
config = template_configs[0]
if user_specified_templates:
if not any(config.parent == t for t in user_specified_templates):
continue
os.environ["LLAMA_STACK_CONFIG"] = str(config)
pytest_args = "--report" if args.report else ""
pytest.main(
[
pytest_args,
"-s",
"-v",
str(REPO_ROOT / CLIENT_SDK_TESTS_RELATIVE_PATH),
]
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="llama_test",
)
parser.add_argument("--templates", nargs="+")
parser.add_argument("--report", action="store_true")
main(parser)

View file

@ -39,7 +39,7 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
def start_llama_stack_server(config_name: str) -> subprocess.Popen: def start_llama_stack_server(config_name: str) -> subprocess.Popen:
"""Start a llama stack server with the given config.""" """Start a llama stack server with the given config."""
cmd = f"uv run --with llama-stack llama stack build --template {config_name} --image-type venv --run" cmd = f"uv run --with llama-stack llama stack build --distro {config_name} --image-type venv --run"
devnull = open(os.devnull, "w") devnull = open(os.devnull, "w")
process = subprocess.Popen( process = subprocess.Popen(
shlex.split(cmd), shlex.split(cmd),

View file

@ -16,8 +16,8 @@ from llama_stack.core.utils.image_types import LlamaStackImageType
def test_container_build_passes_path(monkeypatch, tmp_path): def test_container_build_passes_path(monkeypatch, tmp_path):
called_with = {} called_with = {}
def spy_build_image(build_config, image_name, template_or_config, run_config=None): def spy_build_image(build_config, image_name, distro_or_config, run_config=None):
called_with["path"] = template_or_config called_with["path"] = distro_or_config
called_with["run_config"] = run_config called_with["run_config"] = run_config
return 0 return 0