Merge remote-tracking branch 'origin/main' into stack-config-default-embed

2025-12-12 12:06:04 +00:00 · 2025-10-20 13:29:19 -07:00 · 2025-10-20 13:29:19 -07:00 · 31249a1a75
commit 31249a1a75
parent 7ffd20d112 2c43285e22
237 changed files with 30895 additions and 15441 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,19 @@
 .venv
 __pycache__
 *.pyc
 *.pyo
 *.pyd
 *.so
 .git
 .gitignore
 htmlcov*
 .coverage
 coverage*
 .cache
 .mypy_cache
 .pytest_cache
 .ruff_cache
 uv.lock
 node_modules
 build
 /tmp
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -82,11 +82,14 @@ runs:
          echo "No recording changes"
        fi
-    - name: Write inference logs to file
+    - name: Write docker logs to file
      if: ${{ always() }}
      shell: bash
      run: |
        sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
        distro_name=$(echo "${{ inputs.stack-config }}" | sed 's/^docker://' | sed 's/^server://')
        stack_container_name="llama-stack-test-$distro_name"
        sudo docker logs $stack_container_name > docker-${distro_name}-${{ inputs.inference-mode }}.log || true
    - name: Upload logs
      if: ${{ always() }}
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@ -57,7 +57,7 @@ runs:
        echo "Building Llama Stack"
        LLAMA_STACK_DIR=. \
-          uv run --no-sync llama stack build --template ci-tests --image-type venv
+          uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
    - name: Configure git for commits
      shell: bash
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@ -14,6 +14,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
 | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
 | Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
 | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
 | Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
 | Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |
 | Integration Tests (Record) | [record-integration-tests.yml](record-integration-tests.yml) | Run the integration test suite from tests/integration |
 | Check semantic PR titles | [semantic-pr.yml](semantic-pr.yml) | Ensure that PR titles follow the conventional commit spec |
--- a/.github/workflows/install-script-ci.yml
+++ b/.github/workflows/install-script-ci.yml
@ -30,8 +30,11 @@ jobs:
      - name: Build a single provider
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync \
+          docker build . \
-            llama stack build --template starter --image-type container --image-name test
+            -f containers/Containerfile \
            --build-arg INSTALL_MODE=editable \
            --build-arg DISTRO_NAME=starter \
            --tag llama-stack:starter-ci
      - name: Run installer end-to-end
        run: |
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -73,6 +73,24 @@ jobs:
          image_name: kube
          apis: []
          providers: {}
          storage:
            backends:
              kv_default:
                type: kv_sqlite
                db_path: $run_dir/kvstore.db
              sql_default:
                type: sql_sqlite
                db_path: $run_dir/sql_store.db
            stores:
              metadata:
                namespace: registry
                backend: kv_default
              inference:
                table_name: inference_store
                backend: sql_default
              conversations:
                table_name: openai_conversations
                backend: sql_default
          server:
            port: 8321
          EOF
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -47,7 +47,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        client-type: [library, server]
+        client-type: [library, server, docker]
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
@ -82,7 +82,7 @@ jobs:
        env:
          OPENAI_API_KEY: dummy
        with:
-          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
+          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
          setup: ${{ matrix.config.setup }}
          inference-mode: 'replay'
          suite: ${{ matrix.config.suite }}
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@ -144,7 +144,7 @@ jobs:
      - name: Build Llama Stack
        run: |
-          uv run --no-sync llama stack build --template ci-tests --image-type venv
+          uv run --no-sync llama stack list-deps ci-tests | xargs -L1 uv pip install
      - name: Check Storage and Memory Available Before Tests
        if: ${{ always() }}
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -14,6 +14,8 @@ on:
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/distributions/**'
      - 'pyproject.toml'
      - 'containers/Containerfile'
      - '.dockerignore'
  pull_request:
    paths:
@ -24,6 +26,8 @@ on:
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/distributions/**'
      - 'pyproject.toml'
      - 'containers/Containerfile'
      - '.dockerignore'
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
@ -60,15 +64,19 @@ jobs:
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
-      - name: Print build dependencies
+      - name: Install distribution into venv
        if: matrix.image-type == 'venv'
        run: |
-          uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
+          uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
-      - name: Run Llama Stack Build
+      - name: Build container image
        if: matrix.image-type == 'container'
        run: |
-          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
+          docker build . \
-          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
+            -f containers/Containerfile \
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --distro ${{ matrix.distro }} --image-type ${{ matrix.image-type }} --image-name test
+            --build-arg INSTALL_MODE=editable \
            --build-arg DISTRO_NAME=${{ matrix.distro }} \
            --tag llama-stack:${{ matrix.distro }}-ci
      - name: Print dependencies in the image
        if: matrix.image-type == 'venv'
@ -86,8 +94,8 @@ jobs:
      - name: Build a single provider
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
+          uv pip install -e .
-
+          uv run --no-sync llama stack list-deps --providers inference=remote::ollama | xargs -L1 uv pip install
  build-custom-container-distribution:
    runs-on: ubuntu-latest
    steps:
@ -97,11 +105,16 @@ jobs:
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
-      - name: Build a single provider
+      - name: Build container image
        run: |
-          yq -i '.image_type = "container"' llama_stack/distributions/ci-tests/build.yaml
+          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
-          yq -i '.image_name = "test"' llama_stack/distributions/ci-tests/build.yaml
+          docker build . \
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
+            -f containers/Containerfile \
            --build-arg INSTALL_MODE=editable \
            --build-arg DISTRO_NAME=ci-tests \
            --build-arg BASE_IMAGE="$BASE_IMAGE" \
            --build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
            -t llama-stack:ci-tests
      - name: Inspect the container image entrypoint
        run: |
@ -112,7 +125,7 @@ jobs:
          fi
          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
+          if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
            echo "Entrypoint is not correct"
            exit 1
          fi
@ -129,17 +142,19 @@ jobs:
      - name: Pin distribution to UBI9 base
        run: |
          yq -i '
            .image_type    = "container" |
            .image_name    = "ubi9-test" |
            .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
          ' llama_stack/distributions/ci-tests/build.yaml
-      - name: Build dev container (UBI9)
+      - name: Build UBI9 container image
        env:
          USE_COPY_NOT_MOUNT: "true"
          LLAMA_STACK_DIR: "."
        run: |
-          uv run llama stack build --config llama_stack/distributions/ci-tests/build.yaml
+          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
          docker build . \
            -f containers/Containerfile \
            --build-arg INSTALL_MODE=editable \
            --build-arg DISTRO_NAME=ci-tests \
            --build-arg BASE_IMAGE="$BASE_IMAGE" \
            --build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
            -t llama-stack:ci-tests-ubi9
      - name: Inspect UBI9 image
        run: |
@ -150,7 +165,7 @@ jobs:
          fi
          entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
          echo "Entrypoint: $entrypoint"
-          if [ "$entrypoint" != "[llama stack run /app/run.yaml]" ]; then
+          if [ "$entrypoint" != "[/usr/local/bin/llama-stack-entrypoint.sh]" ]; then
            echo "Entrypoint is not correct"
            exit 1
          fi
--- a/.github/workflows/providers-list-deps.yml
+++ b/.github/workflows/providers-list-deps.yml
@ -0,0 +1,105 @@
 name: Test llama stack list-deps
 run-name: Test llama stack list-deps
 on:
  push:
    branches:
      - main
    paths:
      - 'llama_stack/cli/stack/list_deps.py'
      - 'llama_stack/cli/stack/_list_deps.py'
      - 'llama_stack/core/build.*'
      - 'llama_stack/core/*.sh'
      - '.github/workflows/providers-list-deps.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
  pull_request:
    paths:
      - 'llama_stack/cli/stack/list_deps.py'
      - 'llama_stack/cli/stack/_list_deps.py'
      - 'llama_stack/core/build.*'
      - 'llama_stack/core/*.sh'
      - '.github/workflows/providers-list-deps.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate-matrix:
    runs-on: ubuntu-latest
    outputs:
      distros: ${{ steps.set-matrix.outputs.distros }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - name: Generate Distribution List
        id: set-matrix
        run: |
          distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
          echo "distros=$distros" >> "$GITHUB_OUTPUT"
  list-deps:
    needs: generate-matrix
    runs-on: ubuntu-latest
    strategy:
      matrix:
        distro: ${{ fromJson(needs.generate-matrix.outputs.distros) }}
        image-type: [venv, container]
      fail-fast: false # We want to run all jobs even if some fail
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
      - name: Print dependencies
        run: |
          uv run llama stack list-deps ${{ matrix.distro }}
      - name: Install Distro using llama stack list-deps
        run: |
          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps ${{ matrix.distro }} | xargs -L1 uv pip install
      - name: Print dependencies in the image
        if: matrix.image-type == 'venv'
        run: |
          uv pip list
  show-single-provider:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
      - name: Show a single provider
        run: |
          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack list-deps --providers inference=remote::ollama
  list-deps-from-config:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
      - name: Install dependencies
        uses: ./.github/actions/setup-runner
      - name: list-des from Config
        env:
          USE_COPY_NOT_MOUNT: "true"
          LLAMA_STACK_DIR: "."
        run: |
          uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@ -46,9 +46,9 @@ jobs:
          yq -i '.image_type = "${{ matrix.image-type }}"' tests/external/ramalama-stack/run.yaml
          cat tests/external/ramalama-stack/run.yaml
-      - name: Build distro from config file
+      - name: Install distribution dependencies
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml
+          uv run llama stack list-deps tests/external/ramalama-stack/build.yaml | xargs -L1 uv pip install
      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@ -44,11 +44,14 @@ jobs:
      - name: Print distro dependencies
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml --print-deps-only
+          uv run --no-sync llama stack list-deps tests/external/build.yaml
      - name: Build distro from config file
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml
+          uv venv ci-test
          source ci-test/bin/activate
          uv pip install -e .
          LLAMA_STACK_LOGGING=all=CRITICAL llama stack list-deps tests/external/build.yaml | xargs -L1 uv pip install
      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -167,9 +167,9 @@ under the LICENSE file in the root directory of this source tree.
 Some tips about common tasks you work on while contributing to Llama Stack:
-### Using `llama stack build`
+### Installing dependencies of distributions
-Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
+When installing dependencies for a distribution, you can use `llama stack list-deps` to view and install the required packages.
 Example:
 ```bash
@ -177,7 +177,12 @@ cd work/
 git clone https://github.com/llamastack/llama-stack.git
 git clone https://github.com/llamastack/llama-stack-client-python.git
 cd llama-stack
-LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
+
 # Show dependencies for a distribution
 llama stack list-deps <distro-name>
 # Install dependencies
 llama stack list-deps <distro-name> | xargs -L1 uv pip install
 ```
 ### Updating distribution configurations
--- a/README.md
+++ b/README.md
@ -27,8 +27,11 @@ MODEL="Llama-4-Scout-17B-16E-Instruct"
 # get meta url from llama.com
 huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
 # install dependencies for the distribution
 llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
 # start a llama stack server
-INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
+INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
 # install client to interact with the server
 pip install llama-stack-client
@ -89,7 +92,7 @@ As more providers start supporting Llama 4, you can use them in Llama Stack as w
 To try Llama Stack locally, run:
 ```bash
-curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
+curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh | bash
 ```
 ### Overview
--- a/benchmarking/k8s-benchmark/stack-configmap.yaml
+++ b/benchmarking/k8s-benchmark/stack-configmap.yaml
@ -98,21 +98,30 @@ data:
      - provider_id: model-context-protocol
        provider_type: remote::model-context-protocol
        config: {}
-    metadata_store:
+    storage:
-      type: postgres
+      backends:
-      host: ${env.POSTGRES_HOST:=localhost}
+        kv_default:
-      port: ${env.POSTGRES_PORT:=5432}
+          type: kv_postgres
-      db: ${env.POSTGRES_DB:=llamastack}
+          host: ${env.POSTGRES_HOST:=localhost}
-      user: ${env.POSTGRES_USER:=llamastack}
+          port: ${env.POSTGRES_PORT:=5432}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
+          db: ${env.POSTGRES_DB:=llamastack}
-      table_name: llamastack_kvstore
+          user: ${env.POSTGRES_USER:=llamastack}
-    inference_store:
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
-      type: postgres
+          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-      host: ${env.POSTGRES_HOST:=localhost}
+        sql_default:
-      port: ${env.POSTGRES_PORT:=5432}
+          type: sql_postgres
-      db: ${env.POSTGRES_DB:=llamastack}
+          host: ${env.POSTGRES_HOST:=localhost}
-      user: ${env.POSTGRES_USER:=llamastack}
+          port: ${env.POSTGRES_PORT:=5432}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
+          db: ${env.POSTGRES_DB:=llamastack}
          user: ${env.POSTGRES_USER:=llamastack}
          password: ${env.POSTGRES_PASSWORD:=llamastack}
      references:
        metadata:
          backend: kv_default
          namespace: registry
        inference:
          backend: sql_default
          table_name: inference_store
    models:
    - metadata:
        embedding_dimension: 768
@ -137,5 +146,4 @@ data:
      port: 8323
 kind: ConfigMap
 metadata:
  creationTimestamp: null
  name: llama-stack-config
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@ -95,21 +95,30 @@ providers:
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
-metadata_store:
+storage:
-  type: postgres
+  backends:
-  host: ${env.POSTGRES_HOST:=localhost}
+    kv_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: kv_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-  table_name: llamastack_kvstore
+      user: ${env.POSTGRES_USER:=llamastack}
-inference_store:
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
-  type: postgres
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-  host: ${env.POSTGRES_HOST:=localhost}
+    sql_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: sql_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
  references:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
 models:
 - metadata:
    embedding_dimension: 768
--- a/containers/Containerfile
+++ b/containers/Containerfile
@ -0,0 +1,136 @@
 # syntax=docker/dockerfile:1.6
 #
 # This Dockerfile is used to build the Llama Stack container image.
 # Example:
 # docker build \
 #   -f containers/Containerfile \
 #   --build-arg DISTRO_NAME=starter \
 #   --tag llama-stack:starter .
 ARG BASE_IMAGE=python:3.12-slim
 FROM ${BASE_IMAGE}
 ARG INSTALL_MODE="pypi"
 ARG LLAMA_STACK_DIR="/workspace"
 ARG LLAMA_STACK_CLIENT_DIR=""
 ARG PYPI_VERSION=""
 ARG TEST_PYPI_VERSION=""
 ARG KEEP_WORKSPACE=""
 ARG DISTRO_NAME="starter"
 ARG RUN_CONFIG_PATH=""
 ARG UV_HTTP_TIMEOUT=500
 ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
 WORKDIR /app
 RUN set -eux; \
    if command -v dnf >/dev/null 2>&1; then \
        dnf -y update && \
        dnf install -y iputils git net-tools wget \
            vim-minimal python3.12 python3.12-pip python3.12-wheel \
            python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
        ln -sf /usr/bin/pip3.12 /usr/local/bin/pip && \
        ln -sf /usr/bin/python3.12 /usr/local/bin/python && \
        dnf clean all; \
    elif command -v apt-get >/dev/null 2>&1; then \
        apt-get update && \
        apt-get install -y --no-install-recommends \
            iputils-ping net-tools iproute2 dnsutils telnet \
            curl wget git procps psmisc lsof traceroute bubblewrap \
            gcc g++ && \
        rm -rf /var/lib/apt/lists/*; \
    else \
        echo "Unsupported base image: expected dnf or apt-get" >&2; \
        exit 1; \
    fi
 RUN pip install --no-cache-dir uv
 ENV UV_SYSTEM_PYTHON=1
 ENV INSTALL_MODE=${INSTALL_MODE}
 ENV LLAMA_STACK_DIR=${LLAMA_STACK_DIR}
 ENV LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR}
 ENV PYPI_VERSION=${PYPI_VERSION}
 ENV TEST_PYPI_VERSION=${TEST_PYPI_VERSION}
 ENV KEEP_WORKSPACE=${KEEP_WORKSPACE}
 ENV DISTRO_NAME=${DISTRO_NAME}
 ENV RUN_CONFIG_PATH=${RUN_CONFIG_PATH}
 # Copy the repository so editable installs and run configurations are available.
 COPY . /workspace
 # Install llama-stack
 RUN set -eux; \
    if [ "$INSTALL_MODE" = "editable" ]; then \
        if [ ! -d "$LLAMA_STACK_DIR" ]; then \
            echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
            exit 1; \
        fi; \
        uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
    elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
        uv pip install --no-cache-dir fastapi libcst; \
        if [ -n "$TEST_PYPI_VERSION" ]; then \
            uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
        else \
            uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
        fi; \
    else \
        if [ -n "$PYPI_VERSION" ]; then \
            uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
        else \
            uv pip install --no-cache-dir llama-stack; \
        fi; \
    fi;
 # Install the client package if it is provided
 RUN set -eux; \
    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
        if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
            echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
            exit 1; \
        fi; \
        uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
    fi;
 # Install the dependencies for the distribution
 RUN set -eux; \
    if [ -z "$DISTRO_NAME" ]; then \
        echo "DISTRO_NAME must be provided" >&2; \
        exit 1; \
    fi; \
    deps="$(llama stack list-deps "$DISTRO_NAME")"; \
    if [ -n "$deps" ]; then \
        printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
    fi
 # Cleanup
 RUN set -eux; \
    pip uninstall -y uv; \
    should_remove=1; \
    if [ -n "$KEEP_WORKSPACE" ]; then should_remove=0; fi; \
    if [ "$INSTALL_MODE" = "editable" ]; then should_remove=0; fi; \
    case "$RUN_CONFIG_PATH" in \
        /workspace*) should_remove=0 ;; \
    esac; \
    if [ "$should_remove" -eq 1 ] && [ -d /workspace ]; then rm -rf /workspace; fi
 RUN cat <<'EOF' >/usr/local/bin/llama-stack-entrypoint.sh
 #!/bin/sh
 set -e
 if [ -n "$RUN_CONFIG_PATH" ] && [ -f "$RUN_CONFIG_PATH" ]; then
  exec llama stack run "$RUN_CONFIG_PATH" "$@"
 fi
 if [ -n "$DISTRO_NAME" ]; then
  exec llama stack run "$DISTRO_NAME" "$@"
 fi
 exec llama stack run "$@"
 EOF
 RUN chmod +x /usr/local/bin/llama-stack-entrypoint.sh
 RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
 ENTRYPOINT ["/usr/local/bin/llama-stack-entrypoint.sh"]
--- a/docs/docs/advanced_apis/post_training.mdx
+++ b/docs/docs/advanced_apis/post_training.mdx
@ -51,8 +51,8 @@ device: cpu
 You can access the HuggingFace trainer via the `starter` distribution:
 ```bash
-llama stack build --distro starter --image-type venv
+llama stack list-deps starter | xargs -L1 uv pip install
-llama stack run ~/.llama/distributions/starter/starter-run.yaml
+llama stack run starter
 ```
 ### Usage Example
--- a/docs/docs/building_applications/playground.mdx
+++ b/docs/docs/building_applications/playground.mdx
@ -175,8 +175,7 @@ llama-stack-client benchmarks register \
 **1. Start the Llama Stack API Server**
 ```bash
-# Build and run a distribution (example: together)
+llama stack list-deps together | xargs -L1 uv pip install
 llama stack build --distro together --image-type venv
 llama stack run together
 ```
@ -209,7 +208,7 @@ The playground works with any Llama Stack distribution. Popular options include:
 <TabItem value="together" label="Together AI">
 ```bash
-llama stack build --distro together --image-type venv
+llama stack list-deps together | xargs -L1 uv pip install
 llama stack run together
 ```
@ -222,7 +221,7 @@ llama stack run together
 <TabItem value="ollama" label="Ollama (Local)">
 ```bash
-llama stack build --distro ollama --image-type venv
+llama stack list-deps ollama | xargs -L1 uv pip install
 llama stack run ollama
 ```
@ -235,7 +234,7 @@ llama stack run ollama
 <TabItem value="meta-reference" label="Meta Reference">
 ```bash
-llama stack build --distro meta-reference --image-type venv
+llama stack list-deps meta-reference | xargs -L1 uv pip install
 llama stack run meta-reference
 ```
--- a/docs/docs/building_applications/rag.mdx
+++ b/docs/docs/building_applications/rag.mdx
@ -20,7 +20,8 @@ RAG enables your applications to reference and recall information from external
 In one terminal, start the Llama Stack server:
 ```bash
-uv run llama stack build --distro starter --image-type venv --run
+llama stack list-deps starter | xargs -L1 uv pip install
 llama stack run starter
 ```
 ### 2. Connect with OpenAI Client
--- a/docs/docs/concepts/apis/api_leveling.mdx
+++ b/docs/docs/concepts/apis/api_leveling.mdx
@ -62,6 +62,10 @@ The new `/v2` API must be introduced alongside the existing `/v1` API and run in
 When a `/v2` API is introduced, a clear and generous deprecation policy for the `/v1` API must be published simultaneously. This policy must outline the timeline for the eventual removal of the `/v1` API, giving users ample time to migrate.
 ### Deprecated APIs
 Deprecated APIs are those that are no longer actively maintained or supported. Depreated APIs are marked with the flag `deprecated = True` in the OpenAPI spec. These APIs will be removed in a future release.
 ### API Stability vs. Provider Stability
 The leveling introduced in this document relates to the stability of the API and not specifically the providers within the API.
--- a/docs/docs/contributing/index.mdx
+++ b/docs/docs/contributing/index.mdx
@ -158,17 +158,16 @@ under the LICENSE file in the root directory of this source tree.
 Some tips about common tasks you work on while contributing to Llama Stack:
-### Using `llama stack build`
+### Setup for development
 Building a stack image will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
 Example:
 ```bash
 cd work/
 git clone https://github.com/meta-llama/llama-stack.git
 git clone https://github.com/meta-llama/llama-stack-client-python.git
 cd llama-stack
-LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --distro <...>
+uv run llama stack list-deps <distro-name> | xargs -L1 uv pip install
 # (Optional) If you are developing the llama-stack-client-python package, you can add it as an editable package.
 git clone https://github.com/meta-llama/llama-stack-client-python.git
 uv add --editable ../llama-stack-client-python
 ```
 ### Updating distribution configurations
--- a/docs/docs/contributing/new_api_provider.mdx
+++ b/docs/docs/contributing/new_api_provider.mdx
@ -67,7 +67,7 @@ def get_base_url(self) -> str:
 ## Testing the Provider
-Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
+Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, install its dependencies with `llama stack list-deps together | xargs -L1 uv pip install`.
 ### 1. Integration Testing
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@ -5,225 +5,79 @@ sidebar_label: Build your own Distribution
 sidebar_position: 3
 ---
-This guide will walk you through the steps to get started with building a Llama Stack distribution from scratch with your choice of API providers.
+This guide walks you through inspecting existing distributions, customising their configuration, and building runnable artefacts for your own deployment.
 ### Explore existing distributions
-### Setting your log level
+All first-party distributions live under `llama_stack/distributions/`. Each directory contains:
-In order to specify the proper logging level users can apply the following environment variable `LLAMA_STACK_LOGGING` with the following format:
+- `build.yaml` – the distribution specification (providers, additional dependencies, optional external provider directories).
 - `run.yaml` – sample run configuration (when provided).
 - Documentation fragments that power this site.
-`LLAMA_STACK_LOGGING=server=debug;core=info`
+Browse that folder to understand available providers and copy a distribution to use as a starting point. When creating a new stack, duplicate an existing directory, rename it, and adjust the `build.yaml` file to match your requirements.
 Where each category in the following list:
 - all
 - core
 - server
 - router
 - inference
 - agents
 - safety
 - eval
 - tools
 - client
 Can be set to any of the following log levels:
 - debug
 - info
 - warning
 - error
 - critical
 The default global log level is `info`. `all` sets the log level for all components.
 A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
 ### Llama Stack Build
 In order to build your own distribution, we recommend you clone the `llama-stack` repository.
 ```
 git clone git@github.com:meta-llama/llama-stack.git
 cd llama-stack
 pip install -e .
 ```
 Use the CLI to build your distribution.
 The main points to consider are:
 1. **Image Type** - Do you want a venv environment or a Container (eg. Docker)
 2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
 3. **Config** - Do you want to use a pre-existing config file to build your distribution?
 ```
 llama stack build -h
 usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--distro DISTRIBUTION] [--list-distros] [--image-type {container,venv}] [--image-name IMAGE_NAME] [--print-deps-only]
                         [--run] [--providers PROVIDERS]
 Build a Llama stack container
 options:
  -h, --help            show this help message and exit
  --config CONFIG       Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to
                        enter information interactively (default: None)
  --template TEMPLATE   (deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default:
                        None)
  --distro DISTRIBUTION, --distribution DISTRIBUTION
                        Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions (default: None)
  --list-distros, --list-distributions
                        Show the available distributions for building a Llama Stack distribution (default: False)
  --image-type {container,venv}
                        Image Type to use for the build. If not specified, will use the image type from the template config. (default: None)
  --image-name IMAGE_NAME
                        [for image-type=container|venv] Name of the virtual environment to use for the build. If not specified, currently active environment will be used if found. (default:
                        None)
  --print-deps-only     Print the dependencies for the stack only, without building the stack (default: False)
  --run                 Run the stack after building using the same image type, name, and other applicable arguments (default: False)
  --providers PROVIDERS
                        Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per
                        API. (default: None)
 ```
 After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 <Tabs>
-<TabItem value="template" label="Building from a template">
+<TabItem value="container" label="Building a container">
 To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
-The following command will allow you to see the available templates and their corresponding providers.
+Use the Containerfile at `containers/Containerfile`, which installs `llama-stack`, resolves distribution dependencies via `llama stack list-deps`, and sets the entrypoint to `llama stack run`.
-```
+
-llama stack build --list-templates
+```bash
 docker build . \
  -f containers/Containerfile \
  --build-arg DISTRO_NAME=starter \
  --tag llama-stack:starter
 ```
-```
+Handy build arguments:
 ------------------------------+-----------------------------------------------------------------------------+
 | Template Name                | Description                                                                 |
 +------------------------------+-----------------------------------------------------------------------------+
 | watsonx                      | Use watsonx for running LLM inference                                       |
 +------------------------------+-----------------------------------------------------------------------------+
 | vllm-gpu                     | Use a built-in vLLM engine for running LLM inference                        |
 +------------------------------+-----------------------------------------------------------------------------+
 | together                     | Use Together.AI for running LLM inference                                   |
 +------------------------------+-----------------------------------------------------------------------------+
 | tgi                          | Use (an external) TGI server for running LLM inference                      |
 +------------------------------+-----------------------------------------------------------------------------+
 | starter                      | Quick start template for running Llama Stack with several popular providers |
 +------------------------------+-----------------------------------------------------------------------------+
 | sambanova                    | Use SambaNova for running LLM inference and safety                          |
 +------------------------------+-----------------------------------------------------------------------------+
 | remote-vllm                  | Use (an external) vLLM server for running LLM inference                     |
 +------------------------------+-----------------------------------------------------------------------------+
 | postgres-demo                | Quick start template for running Llama Stack with several popular providers |
 +------------------------------+-----------------------------------------------------------------------------+
 | passthrough                  | Use Passthrough hosted llama-stack endpoint for LLM inference               |
 +------------------------------+-----------------------------------------------------------------------------+
 | open-benchmark               | Distribution for running open benchmarks                                    |
 +------------------------------+-----------------------------------------------------------------------------+
 | ollama                       | Use (an external) Ollama server for running LLM inference                   |
 +------------------------------+-----------------------------------------------------------------------------+
 | nvidia                       | Use NVIDIA NIM for running LLM inference, evaluation and safety             |
 +------------------------------+-----------------------------------------------------------------------------+
 | meta-reference-gpu           | Use Meta Reference for running LLM inference                                |
 +------------------------------+-----------------------------------------------------------------------------+
 | llama_api                    | Distribution for running e2e tests in CI                                    |
 +------------------------------+-----------------------------------------------------------------------------+
 | hf-serverless                | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
 +------------------------------+-----------------------------------------------------------------------------+
 | hf-endpoint                  | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
 +------------------------------+-----------------------------------------------------------------------------+
 | groq                         | Use Groq for running LLM inference                                          |
 +------------------------------+-----------------------------------------------------------------------------+
 | fireworks                    | Use Fireworks.AI for running LLM inference                                  |
 +------------------------------+-----------------------------------------------------------------------------+
 | experimental-post-training   | Experimental template for post training                                     |
 +------------------------------+-----------------------------------------------------------------------------+
 | dell                         | Dell's distribution of Llama Stack. TGI inference via Dell's custom         |
 |                              | container                                                                   |
 +------------------------------+-----------------------------------------------------------------------------+
 | ci-tests                     | Distribution for running e2e tests in CI                                    |
 +------------------------------+-----------------------------------------------------------------------------+
 | cerebras                     | Use Cerebras for running LLM inference                                      |
 +------------------------------+-----------------------------------------------------------------------------+
 | bedrock                      | Use AWS Bedrock for running LLM inference and safety                        |
 +------------------------------+-----------------------------------------------------------------------------+
 ```
-You may then pick a template to build your distribution with providers fitted to your liking.
+- `DISTRO_NAME` – distribution directory name (defaults to `starter`).
 - `RUN_CONFIG_PATH` – absolute path inside the build context for a run config that should be baked into the image (e.g. `/workspace/run.yaml`).
 - `INSTALL_MODE=editable` – install the repository copied into `/workspace` with `uv pip install -e`. Pair it with `--build-arg LLAMA_STACK_DIR=/workspace`.
 - `LLAMA_STACK_CLIENT_DIR` – optional editable install of the Python client.
 - `PYPI_VERSION` / `TEST_PYPI_VERSION` – pin specific releases when not using editable installs.
 - `KEEP_WORKSPACE=1` – retain `/workspace` in the final image if you need to access additional files (such as sample configs or provider bundles).
-For example, to build a distribution with TGI as the inference provider, you can run:
+Make sure any custom `build.yaml`, run configs, or provider directories you reference are included in the Docker build context so the Containerfile can read them.
 ```
 $ llama stack build --distro starter
 ...
 You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
 ```
 ```{tip}
 The generated `run.yaml` file is a starting point for your configuration. For comprehensive guidance on customizing it for your specific needs, infrastructure, and deployment scenarios, see [Customizing Your run.yaml Configuration](customizing_run_yaml.md).
 ```
 </TabItem>
-<TabItem value="scratch" label="Building from Scratch">
+<TabItem value="external" label="Building with external providers">
-If the provided templates do not fit your use case, you could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations.
+External providers live outside the main repository but can be bundled by pointing `external_providers_dir` to a directory that contains your provider packages.
-It would be best to start with a template and understand the structure of the config file and the various concepts ( APIS, providers, resources, etc.) before starting from scratch.
+1. Copy providers into the build context, for example `cp -R path/to/providers providers.d`.
-```
+2. Update `build.yaml` with the directory and provider entries.
-llama stack build
+3. Adjust run configs to use the in-container path (usually `/.llama/providers.d`). Pass `--build-arg RUN_CONFIG_PATH=/workspace/run.yaml` if you want to bake the config.
-> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
+Example `build.yaml` excerpt for a custom Ollama provider:
 > Enter the image type you want your Llama Stack to be built as (container or venv): venv
 Llama Stack is composed of several APIs working together. Let's select
 the provider types (implementations) you want to use for these APIs.
 Tip: use <TAB> to see options for the providers.
 > Enter provider for API inference: inline::meta-reference
 > Enter provider for API safety: inline::llama-guard
 > Enter provider for API agents: inline::meta-reference
 > Enter provider for API memory: inline::faiss
 > Enter provider for API datasetio: inline::meta-reference
 > Enter provider for API scoring: inline::meta-reference
 > Enter provider for API eval: inline::meta-reference
 > Enter provider for API telemetry: inline::meta-reference
 > (Optional) Enter a short description for your Llama Stack:
 You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml`
 ```
 </TabItem>
 <TabItem value="config" label="Building from a pre-existing build config file">
 - In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
 - The config file will be of contents like the ones in `llama_stack/distributions/*build.yaml`.
 ```
 llama stack build --config llama_stack/distributions/starter/build.yaml
 ```
 </TabItem>
 <TabItem value="external" label="Building with External Providers">
 Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
 To build a distribution with external providers, you need to:
 1. Configure the `external_providers_dir` in your build configuration file:
 ```yaml
 # Example my-external-stack.yaml with external providers
 version: '2'
 distribution_spec:
  description: Custom distro for CI tests
  providers:
    inference:
-    - remote::custom_ollama
+      - remote::custom_ollama
-# Add more providers as needed
+external_providers_dir: /workspace/providers.d
-image_type: container
+```
-image_name: ci-test
+
-# Path to external provider implementations
+Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies:
-external_providers_dir: ~/.llama/providers.d
+
 ```python
 from llama_stack.providers.datatypes import ProviderSpec
 def get_provider_spec() -> ProviderSpec:
    return ProviderSpec(
        provider_type="remote::custom_ollama",
        module="llama_stack_ollama_provider",
        config_class="llama_stack_ollama_provider.config.OllamaImplConfig",
        pip_packages=[
            "ollama",
            "aiohttp",
            "llama-stack-provider-ollama",
        ],
    )
 ```
 Here's an example for a custom Ollama provider:
@ -232,9 +86,9 @@ Here's an example for a custom Ollama provider:
 adapter:
  adapter_type: custom_ollama
  pip_packages:
-  - ollama
+    - ollama
-  - aiohttp
+    - aiohttp
-  - llama-stack-provider-ollama # This is the provider package
+    - llama-stack-provider-ollama  # This is the provider package
  config_class: llama_stack_ollama_provider.config.OllamaImplConfig
  module: llama_stack_ollama_provider
 api_dependencies: []
@ -245,53 +99,22 @@ The `pip_packages` section lists the Python packages required by the provider, a
 provider package itself. The package must be available on PyPI or can be provided from a local
 directory or a git repository (git must be installed on the build environment).
-2. Build your distribution using the config file:
+For deeper guidance, see the [External Providers documentation](../providers/external/).
 ```
 llama stack build --config my-external-stack.yaml
 ```
 For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external/).
 </TabItem>
-<TabItem value="container" label="Building Container">
+</Tabs>
-:::tip Podman Alternative
+### Run your stack server
 Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
 :::
-To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
+After building the image, launch it directly with Docker or Podman—the entrypoint calls `llama stack run` using the baked distribution or the bundled run config:
 ```
 llama stack build --distro starter --image-type container
 ```
 ```
 $ llama stack build --distro starter --image-type container
 ...
 Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
 ...
 ```
 You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
 ```
 Now set some environment variables for the inference model ID and Llama Stack Port and create a local directory to mount into the container's file system.
 ```bash
 export INFERENCE_MODEL="llama3.2:3b"
 export LLAMA_STACK_PORT=8321
 mkdir -p ~/.llama
 ```
 After this step is successful, you should be able to find the built container image and test it with the below Docker command:
 ```
 docker run -d \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -e INFERENCE_MODEL=$INFERENCE_MODEL \
  -e OLLAMA_URL=http://host.docker.internal:11434 \
-  localhost/distribution-ollama:dev \
+  llama-stack:starter \
  --port $LLAMA_STACK_PORT
 ```
@ -311,131 +134,14 @@ Here are the docker flags and their uses:
 * `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
 </TabItem>
 </Tabs>
-### Running your Stack server
+If you prepared a custom run config, mount it into the container and reference it explicitly:
 Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack build` step.
 ```bash
 docker run \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v $(pwd)/run.yaml:/app/run.yaml \
  llama-stack:starter \
  /app/run.yaml
 ```
 llama stack run -h
 usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
                       [--image-type {venv}] [--enable-ui]
                       [config | distro]
 Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
 positional arguments:
  config | distro       Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
 options:
  -h, --help            show this help message and exit
  --port PORT           Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
  --image-name IMAGE_NAME
                        [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
  --image-type {venv}
                        [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
  --enable-ui           Start the UI server (default: False)
 ```
 **Note:** Container images built with `llama stack build --image-type container` cannot be run using `llama stack run`. Instead, they must be run directly using Docker or Podman commands as shown in the container building section above.
 ```
 # Start using template name
 llama stack run tgi
 # Start using config file
 llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
 ```
 ```
 $ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
 Serving API inspect
 GET /health
 GET /providers/list
 GET /routes/list
 Serving API inference
 POST /inference/chat_completion
 POST /inference/completion
 POST /inference/embeddings
 ...
 Serving API agents
 POST /agents/create
 POST /agents/session/create
 POST /agents/turn/create
 POST /agents/delete
 POST /agents/session/delete
 POST /agents/session/get
 POST /agents/step/get
 POST /agents/turn/get
 Listening on ['::', '0.0.0.0']:8321
 INFO:     Started server process [2935911]
 INFO:     Waiting for application startup.
 INFO:     Application startup complete.
 INFO:     Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
 INFO:     2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
 ```
 ### Listing Distributions
 Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
 ```
 llama stack list -h
 usage: llama stack list [-h]
 list the build stacks
 options:
  -h, --help  show this help message and exit
 ```
 Example Usage
 ```
 llama stack list
 ```
 ```
 ------------------------------+-----------------------------------------------------------------+--------------+------------+
 | Stack Name                  | Path                                                            | Build Config | Run Config |
 +------------------------------+-----------------------------------------------------------------------------+--------------+
 | together                    | ~/.llama/distributions/together                                 | Yes          | No         |
 +------------------------------+-----------------------------------------------------------------------------+--------------+
 | bedrock                     | ~/.llama/distributions/bedrock                                  | Yes          | No         |
 +------------------------------+-----------------------------------------------------------------------------+--------------+
 | starter                     | ~/.llama/distributions/starter                                  | Yes          | Yes        |
 +------------------------------+-----------------------------------------------------------------------------+--------------+
 | remote-vllm                 | ~/.llama/distributions/remote-vllm                              | Yes          | Yes        |
 +------------------------------+-----------------------------------------------------------------------------+--------------+
 ```
 ### Removing a Distribution
 Use the remove command to delete a distribution you've previously built.
 ```
 llama stack rm -h
 usage: llama stack rm [-h] [--all] [name]
 Remove the build stack
 positional arguments:
  name        Name of the stack to delete (default: None)
 options:
  -h, --help  show this help message and exit
  --all, -a   Delete all stacks (use with caution) (default: False)
 ```
 Example
 ```
 llama stack rm llamastack-test
 ```
 To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they're no longer needed.
 ### Troubleshooting
 If you encounter any issues, ask questions in our discord or search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@ -44,18 +44,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sqlite
+        agent_state:
-        namespace: null
+          backend: kv_default
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
+          namespace: agents
        responses:
          backend: sql_default
          table_name: responses
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
-metadata_store:
+storage:
-  namespace: null
+  backends:
-  type: sqlite
+    kv_default:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
+      type: kv_sqlite
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db
    sql_default:
      type: sql_sqlite
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
  references:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
--- a/docs/docs/distributions/importing_as_library.mdx
+++ b/docs/docs/distributions/importing_as_library.mdx
@ -12,7 +12,7 @@ This avoids the overhead of setting up a server.
 ```bash
 # setup
 uv pip install llama-stack
-llama stack build --distro starter --image-type venv
+llama stack list-deps starter | xargs -L1 uv pip install
 ```
 ```python
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@ -1,56 +1,155 @@
 apiVersion: v1
 data:
-  stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n-
+  stack_run_config.yaml: |
-    inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n
+    version: '2'
-    \ inference:\n  - provider_id: vllm-inference\n    provider_type: remote::vllm\n
+    image_name: kubernetes-demo
-    \   config:\n      url: ${env.VLLM_URL:=http://localhost:8000/v1}\n      max_tokens:
+    apis:
-    ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n      tls_verify:
+    - agents
-    ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: vllm-safety\n    provider_type:
+    - inference
-    remote::vllm\n    config:\n      url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n
+    - files
-    \     max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n
+    - safety
-    \     tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: sentence-transformers\n
+    - telemetry
-    \   provider_type: inline::sentence-transformers\n    config: {}\n  vector_io:\n
+    - tool_runtime
-    \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n    provider_type: remote::chromadb\n
+    - vector_io
-    \   config:\n      url: ${env.CHROMADB_URL:=}\n      kvstore:\n        type: postgres\n
+    providers:
-    \       host: ${env.POSTGRES_HOST:=localhost}\n        port: ${env.POSTGRES_PORT:=5432}\n
+      inference:
-    \       db: ${env.POSTGRES_DB:=llamastack}\n        user: ${env.POSTGRES_USER:=llamastack}\n
+      - provider_id: vllm-inference
-    \       password: ${env.POSTGRES_PASSWORD:=llamastack}\n  files:\n  - provider_id:
+        provider_type: remote::vllm
-    meta-reference-files\n    provider_type: inline::localfs\n    config:\n      storage_dir:
+        config:
-    ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n      metadata_store:\n
+          url: ${env.VLLM_URL:=http://localhost:8000/v1}
-    \       type: sqlite\n        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-    \ \n  safety:\n  - provider_id: llama-guard\n    provider_type: inline::llama-guard\n
+          api_token: ${env.VLLM_API_TOKEN:=fake}
-    \   config:\n      excluded_categories: []\n  agents:\n  - provider_id: meta-reference\n
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-    \   provider_type: inline::meta-reference\n    config:\n      persistence_store:\n
+      - provider_id: vllm-safety
-    \       type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n        port:
+        provider_type: remote::vllm
-    ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n        user:
+        config:
-    ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
+          url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
-    \     responses_store:\n        type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-    \       port: ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n
+          api_token: ${env.VLLM_API_TOKEN:=fake}
-    \       user: ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-    \ telemetry:\n  - provider_id: meta-reference\n    provider_type: inline::meta-reference\n
+      - provider_id: sentence-transformers
-    \   config:\n      service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n      sinks:
+        provider_type: inline::sentence-transformers
-    ${env.TELEMETRY_SINKS:=console}\n  tool_runtime:\n  - provider_id: brave-search\n
+        config: {}
-    \   provider_type: remote::brave-search\n    config:\n      api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n
+      vector_io:
-    \     max_results: 3\n  - provider_id: tavily-search\n    provider_type: remote::tavily-search\n
+      - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
-    \   config:\n      api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n      max_results:
+        provider_type: remote::chromadb
-    3\n  - provider_id: rag-runtime\n    provider_type: inline::rag-runtime\n    config:
+        config:
-    {}\n  - provider_id: model-context-protocol\n    provider_type: remote::model-context-protocol\n
+          url: ${env.CHROMADB_URL:=}
-    \   config: {}\nmetadata_store:\n  type: postgres\n  host: ${env.POSTGRES_HOST:=localhost}\n
+          kvstore:
-    \ port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n  user:
+            type: postgres
-    ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\n
+            host: ${env.POSTGRES_HOST:=localhost}
-    \ table_name: llamastack_kvstore\ninference_store:\n  type: postgres\n  host:
+            port: ${env.POSTGRES_PORT:=5432}
-    ${env.POSTGRES_HOST:=localhost}\n  port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n
+            db: ${env.POSTGRES_DB:=llamastack}
-    \ user: ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n-
+            user: ${env.POSTGRES_USER:=llamastack}
-    metadata:\n    embedding_dimension: 384\n  model_id: all-MiniLM-L6-v2\n  provider_id:
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
-    sentence-transformers\n  model_type: embedding\n- metadata: {}\n  model_id: ${env.INFERENCE_MODEL}\n
+      files:
-    \ provider_id: vllm-inference\n  model_type: llm\n- metadata: {}\n  model_id:
+      - provider_id: meta-reference-files
-    ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n  provider_id: vllm-safety\n
+        provider_type: inline::localfs
-    \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs:
+        config:
-    []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id:
+          storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
-    builtin::websearch\n  provider_id: tavily-search\n- toolgroup_id: builtin::rag\n
+          metadata_store:
-    \ provider_id: rag-runtime\nserver:\n  port: 8321\n  auth:\n    provider_config:\n
+            type: sqlite
-    \     type: github_token\n"
+            db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
      safety:
      - provider_id: llama-guard
        provider_type: inline::llama-guard
        config:
          excluded_categories: []
      agents:
      - provider_id: meta-reference
        provider_type: inline::meta-reference
        config:
          persistence_store:
            type: postgres
            host: ${env.POSTGRES_HOST:=localhost}
            port: ${env.POSTGRES_PORT:=5432}
            db: ${env.POSTGRES_DB:=llamastack}
            user: ${env.POSTGRES_USER:=llamastack}
            password: ${env.POSTGRES_PASSWORD:=llamastack}
          responses_store:
            type: postgres
            host: ${env.POSTGRES_HOST:=localhost}
            port: ${env.POSTGRES_PORT:=5432}
            db: ${env.POSTGRES_DB:=llamastack}
            user: ${env.POSTGRES_USER:=llamastack}
            password: ${env.POSTGRES_PASSWORD:=llamastack}
      telemetry:
      - provider_id: meta-reference
        provider_type: inline::meta-reference
        config:
          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
          sinks: ${env.TELEMETRY_SINKS:=console}
      tool_runtime:
      - provider_id: brave-search
        provider_type: remote::brave-search
        config:
          api_key: ${env.BRAVE_SEARCH_API_KEY:+}
          max_results: 3
      - provider_id: tavily-search
        provider_type: remote::tavily-search
        config:
          api_key: ${env.TAVILY_SEARCH_API_KEY:+}
          max_results: 3
      - provider_id: rag-runtime
        provider_type: inline::rag-runtime
        config: {}
      - provider_id: model-context-protocol
        provider_type: remote::model-context-protocol
        config: {}
    storage:
      backends:
        kv_default:
          type: kv_postgres
          host: ${env.POSTGRES_HOST:=localhost}
          port: ${env.POSTGRES_PORT:=5432}
          db: ${env.POSTGRES_DB:=llamastack}
          user: ${env.POSTGRES_USER:=llamastack}
          password: ${env.POSTGRES_PASSWORD:=llamastack}
          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
        sql_default:
          type: sql_postgres
          host: ${env.POSTGRES_HOST:=localhost}
          port: ${env.POSTGRES_PORT:=5432}
          db: ${env.POSTGRES_DB:=llamastack}
          user: ${env.POSTGRES_USER:=llamastack}
          password: ${env.POSTGRES_PASSWORD:=llamastack}
      references:
        metadata:
          backend: kv_default
          namespace: registry
        inference:
          backend: sql_default
          table_name: inference_store
    models:
    - metadata:
        embedding_dimension: 768
      model_id: nomic-embed-text-v1.5
      provider_id: sentence-transformers
      model_type: embedding
    - metadata: {}
      model_id: ${env.INFERENCE_MODEL}
      provider_id: vllm-inference
      model_type: llm
    - metadata: {}
      model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
      provider_id: vllm-safety
      model_type: llm
    shields:
    - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
    vector_dbs: []
    datasets: []
    scoring_fns: []
    benchmarks: []
    tool_groups:
    - toolgroup_id: builtin::websearch
      provider_id: tavily-search
    - toolgroup_id: builtin::rag
      provider_id: rag-runtime
    server:
      port: 8321
      auth:
        provider_config:
          type: github_token
 kind: ConfigMap
 metadata:
  creationTimestamp: null
  name: llama-stack-config
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@ -93,21 +93,30 @@ providers:
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
-metadata_store:
+storage:
-  type: postgres
+  backends:
-  host: ${env.POSTGRES_HOST:=localhost}
+    kv_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: kv_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-  table_name: llamastack_kvstore
+      user: ${env.POSTGRES_USER:=llamastack}
-inference_store:
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
-  type: postgres
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-  host: ${env.POSTGRES_HOST:=localhost}
+    sql_default:
-  port: ${env.POSTGRES_PORT:=5432}
+      type: sql_postgres
-  db: ${env.POSTGRES_DB:=llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
-  user: ${env.POSTGRES_USER:=llamastack}
+      port: ${env.POSTGRES_PORT:=5432}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
  references:
    metadata:
      backend: kv_default
      namespace: registry
    inference:
      backend: sql_default
      table_name: inference_store
 models:
 - metadata:
    embedding_dimension: 768
--- a/docs/docs/distributions/ondevice_distro/android_sdk.md
+++ b/docs/docs/distributions/ondevice_distro/android_sdk.md
@ -59,7 +59,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
 uv venv starter --python 3.12
 source starter/bin/activate  # On Windows: starter\Scripts\activate
 pip install --no-cache llama-stack==0.2.2
-llama stack build --distro starter --image-type venv
+llama stack list-deps starter | xargs -L1 uv pip install
 export FIREWORKS_API_KEY=<SOME_KEY>
 llama stack run starter --port 5050
 ```
--- a/docs/docs/distributions/self_hosted_distro/dell.md
+++ b/docs/docs/distributions/self_hosted_distro/dell.md
@ -166,10 +166,10 @@ docker run \
 ### Via venv
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+Install the distribution dependencies before launching:
 ```bash
-llama stack build --distro dell --image-type venv
+llama stack list-deps dell | xargs -L1 uv pip install
 INFERENCE_MODEL=$INFERENCE_MODEL \
 DEH_URL=$DEH_URL \
 CHROMA_URL=$CHROMA_URL \
--- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
@ -81,10 +81,10 @@ docker run \
 ### Via venv
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
+Make sure you have the Llama Stack CLI available.
 ```bash
-llama stack build --distro meta-reference-gpu --image-type venv
+llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
 INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
 llama stack run distributions/meta-reference-gpu/run.yaml \
  --port 8321
--- a/docs/docs/distributions/self_hosted_distro/nvidia.md
+++ b/docs/docs/distributions/self_hosted_distro/nvidia.md
@ -136,11 +136,11 @@ docker run \
 ### Via venv
-If you've set up your local development environment, you can also build the image using your local virtual environment.
+If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
 ```bash
 INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
-llama stack build --distro nvidia --image-type venv
+llama stack list-deps nvidia | xargs -L1 uv pip install
 NVIDIA_API_KEY=$NVIDIA_API_KEY \
 INFERENCE_MODEL=$INFERENCE_MODEL \
 llama stack run ./run.yaml \
--- a/docs/docs/distributions/self_hosted_distro/starter.md
+++ b/docs/docs/distributions/self_hosted_distro/starter.md
@ -169,7 +169,11 @@ docker run \
 Ensure you have configured the starter distribution using the environment variables explained above.
 ```bash
-uv run --with llama-stack llama stack build --distro starter --image-type venv --run
+# Install dependencies for the starter distribution
 uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
 # Run the server
 uv run --with llama-stack llama stack run starter
 ```
 ## Example Usage
--- a/docs/docs/distributions/starting_llama_stack_server.mdx
+++ b/docs/docs/distributions/starting_llama_stack_server.mdx
@ -23,6 +23,17 @@ Another simple way to start interacting with Llama Stack is to just spin up a co
 If you have built a container image and want to deploy it in a Kubernetes cluster instead of starting the Llama Stack server locally. See [Kubernetes Deployment Guide](../deploying/kubernetes_deployment) for more details.
 ## Configure logging
 Control log output via environment variables before starting the server.
 - `LLAMA_STACK_LOGGING` sets per-component levels, e.g. `LLAMA_STACK_LOGGING=server=debug;core=info`.
 - Supported categories: `all`, `core`, `server`, `router`, `inference`, `agents`, `safety`, `eval`, `tools`, `client`.
 - Levels: `debug`, `info`, `warning`, `error`, `critical` (default is `info`). Use `all=<level>` to apply globally.
 - `LLAMA_STACK_LOG_FILE=/path/to/log` mirrors logs to a file while still printing to stdout.
 Export these variables prior to running `llama stack run`, launching a container, or starting the server through any other pathway.
 ```{toctree}
 :maxdepth: 1
 :hidden:
--- a/docs/docs/getting_started/detailed_tutorial.mdx
+++ b/docs/docs/getting_started/detailed_tutorial.mdx
@ -58,15 +58,19 @@ Llama Stack is a server that exposes multiple APIs, you connect with it using th
 <Tabs>
 <TabItem value="venv" label="Using venv">
-You can use Python to build and run the Llama Stack server, which is useful for testing and development.
+You can use Python to install dependencies and run the Llama Stack server, which is useful for testing and development.
 Llama Stack uses a [YAML configuration file](../distributions/configuration) to specify the stack setup,
 which defines the providers and their settings. The generated configuration serves as a starting point that you can [customize for your specific needs](../distributions/customizing_run_yaml).
-Now let's build and run the Llama Stack config for Ollama.
+Now let's install dependencies and run the Llama Stack config for Ollama.
 We use `starter` as template. By default all providers are disabled, this requires enable ollama by passing environment variables.
 ```bash
-llama stack build --distro starter --image-type venv --run
+# Install dependencies for the starter distribution
 uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
 # Run the server
 llama stack run starter
 ```
 </TabItem>
 <TabItem value="container" label="Using a Container">
@ -304,7 +308,7 @@ stream = agent.create_turn(
 for event in AgentEventLogger().log(stream):
    event.print()
 ```
-### ii. Run the Script
+#### ii. Run the Script
 Let's run the script using `uv`
 ```bash
 uv run python agent.py
--- a/docs/docs/getting_started/quickstart.mdx
+++ b/docs/docs/getting_started/quickstart.mdx
@ -24,10 +24,13 @@ ollama run llama3.2:3b --keepalive 60m
 #### Step 2: Run the Llama Stack server
-We will use `uv` to run the Llama Stack server.
+We will use `uv` to install dependencies and run the Llama Stack server.
 ```bash
-OLLAMA_URL=http://localhost:11434 \
+# Install dependencies for the starter distribution
-  uv run --with llama-stack llama stack build --distro starter --image-type venv --run
+uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
 # Run the server
 OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
 ```
 #### Step 3: Run the demo
 Now open up a new terminal and copy the following script into a file named `demo_script.py`.
--- a/docs/docs/providers/agents/inline_meta-reference.mdx
+++ b/docs/docs/providers/agents/inline_meta-reference.mdx
@ -14,16 +14,18 @@ Meta's reference implementation of an agent system that can use tools, access ve
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No |  |  |
 | `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite |  |
 ## Sample Configuration
 ```yaml
-persistence_store:
+persistence:
-  type: sqlite
+  agent_state:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
+    namespace: agents
-responses_store:
+    backend: kv_default
-  type: sqlite
+  responses:
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db
+    table_name: responses
    backend: sql_default
    max_write_queue_size: 10000
    num_writers: 4
 ```
--- a/docs/docs/providers/batches/inline_reference.mdx
+++ b/docs/docs/providers/batches/inline_reference.mdx
@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Configuration for the key-value store backend. |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Configuration for the key-value store backend. |
 | `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
 | `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
@ -22,6 +22,6 @@ Reference implementation of batches API with KVStore persistence.
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: batches
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/datasetio/inline_localfs.mdx
+++ b/docs/docs/providers/datasetio/inline_localfs.mdx
@ -14,12 +14,12 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: datasetio::localfs
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/datasetio/remote_huggingface.mdx
+++ b/docs/docs/providers/datasetio/remote_huggingface.mdx
@ -14,12 +14,12 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: datasetio::huggingface
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/eval/index.mdx
+++ b/docs/docs/providers/eval/index.mdx
@ -1,5 +1,7 @@
 ---
-description: "Llama Stack Evaluation API for running evaluations on model and agent candidates."
+description: "Evaluations
    Llama Stack Evaluation API for running evaluations on model and agent candidates."
 sidebar_label: Eval
 title: Eval
 ---
@ -8,6 +10,8 @@ title: Eval
 ## Overview
-Llama Stack Evaluation API for running evaluations on model and agent candidates.
+Evaluations
    Llama Stack Evaluation API for running evaluations on model and agent candidates.
 This section contains documentation for all available providers for the **eval** API.
--- a/docs/docs/providers/eval/inline_meta-reference.mdx
+++ b/docs/docs/providers/eval/inline_meta-reference.mdx
@ -14,12 +14,12 @@ Meta's reference implementation of evaluation tasks with support for multiple la
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 kvstore:
-  type: sqlite
+  namespace: eval
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/external/external-providers-guide.mdx
+++ b/docs/docs/providers/external/external-providers-guide.mdx
@ -240,6 +240,6 @@ additional_pip_packages:
 - sqlalchemy[asyncio]
 ```
-No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
+No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
 The provider will now be available in Llama Stack with the type `remote::ramalama`.
--- a/docs/docs/providers/files/inline_localfs.mdx
+++ b/docs/docs/providers/files/inline_localfs.mdx
@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `storage_dir` | `<class 'str'>` | No |  | Directory to store uploaded files |
-| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
+| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
 | `ttl_secs` | `<class 'int'>` | No | 31536000 |  |
 ## Sample Configuration
@ -23,6 +23,6 @@ Local filesystem-based file storage provider for managing files and documents lo
 ```yaml
 storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files}
 metadata_store:
-  type: sqlite
+  table_name: files_metadata
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db
+  backend: sql_default
 ```
--- a/docs/docs/providers/files/remote_s3.mdx
+++ b/docs/docs/providers/files/remote_s3.mdx
@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad
 | `aws_secret_access_key` | `str \| None` | No |  | AWS secret access key (optional if using IAM roles) |
 | `endpoint_url` | `str \| None` | No |  | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
 | `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
-| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata |
+| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
 ## Sample Configuration
@ -32,6 +32,6 @@ aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY:=}
 endpoint_url: ${env.S3_ENDPOINT_URL:=}
 auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false}
 metadata_store:
-  type: sqlite
+  table_name: s3_files_metadata
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db
+  backend: sql_default
 ```
--- a/docs/docs/providers/vector_io/inline_chromadb.mdx
+++ b/docs/docs/providers/vector_io/inline_chromadb.mdx
@ -79,13 +79,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 ## Sample Configuration
 ```yaml
 db_path: ${env.CHROMADB_PATH}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::chroma
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_faiss.mdx
+++ b/docs/docs/providers/vector_io/inline_faiss.mdx
@ -95,12 +95,12 @@ more details about Faiss in general.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::faiss
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_meta-reference.mdx
+++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx
@ -14,14 +14,14 @@ Meta's reference implementation of a vector database.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::faiss
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
+  backend: kv_default
 ```
 ## Deprecation Notice
--- a/docs/docs/providers/vector_io/inline_milvus.mdx
+++ b/docs/docs/providers/vector_io/inline_milvus.mdx
@ -17,14 +17,14 @@ Please refer to the remote provider documentation.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
 ## Sample Configuration
 ```yaml
 db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::milvus
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_qdrant.mdx
+++ b/docs/docs/providers/vector_io/inline_qdrant.mdx
@ -98,13 +98,13 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `path` | `<class 'str'>` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::qdrant
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@ -408,13 +408,13 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
 ```yaml
 db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::sqlite_vec
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
@ -17,15 +17,15 @@ Please refer to the sqlite-vec provider documentation.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend (SQLite only for now) |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
 ```yaml
 db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::sqlite_vec
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db
+  backend: kv_default
 ```
 ## Deprecation Notice
--- a/docs/docs/providers/vector_io/remote_chromadb.mdx
+++ b/docs/docs/providers/vector_io/remote_chromadb.mdx
@ -78,13 +78,13 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `url` | `str \| None` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 ## Sample Configuration
 ```yaml
 url: ${env.CHROMADB_URL}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::chroma_remote
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_milvus.mdx
+++ b/docs/docs/providers/vector_io/remote_milvus.mdx
@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
 | `uri` | `<class 'str'>` | No |  | The URI of the Milvus server |
 | `token` | `str \| None` | No |  | The token of the Milvus server |
 | `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
 | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
 :::note
@ -420,7 +420,7 @@ This configuration class accepts additional fields beyond those listed above. Yo
 ```yaml
 uri: ${env.MILVUS_ENDPOINT}
 token: ${env.MILVUS_TOKEN}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::milvus_remote
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_pgvector.mdx
+++ b/docs/docs/providers/vector_io/remote_pgvector.mdx
@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
 | `db` | `str \| None` | No | postgres |  |
 | `user` | `str \| None` | No | postgres |  |
 | `password` | `str \| None` | No | mysecretpassword |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
@ -228,7 +228,7 @@ port: ${env.PGVECTOR_PORT:=5432}
 db: ${env.PGVECTOR_DB}
 user: ${env.PGVECTOR_USER}
 password: ${env.PGVECTOR_PASSWORD}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::pgvector
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_qdrant.mdx
+++ b/docs/docs/providers/vector_io/remote_qdrant.mdx
@ -26,13 +26,13 @@ Please refer to the inline provider documentation.
 | `prefix` | `str \| None` | No |  |  |
 | `timeout` | `int \| None` | No |  |  |
 | `host` | `str \| None` | No |  |  |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite |  |
+| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
 ## Sample Configuration
 ```yaml
 api_key: ${env.QDRANT_API_KEY:=}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::qdrant_remote
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db
+  backend: kv_default
 ```
--- a/docs/docs/providers/vector_io/remote_weaviate.mdx
+++ b/docs/docs/providers/vector_io/remote_weaviate.mdx
@ -75,14 +75,14 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
 |-------|------|----------|---------|-------------|
 | `weaviate_api_key` | `str \| None` | No |  | The API key for the Weaviate instance |
 | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
-| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig, annotation=NoneType, required=False, default='sqlite', discriminator='type'` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 ## Sample Configuration
 ```yaml
 weaviate_api_key: null
 weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
-kvstore:
+persistence:
-  type: sqlite
+  namespace: vector_io::weaviate
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db
+  backend: kv_default
 ```
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
--- a/docs/getting_started_llama4.ipynb
+++ b/docs/getting_started_llama4.ipynb
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
--- a/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
+++ b/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb
@ -2864,7 +2864,7 @@
    }
   ],
   "source": [
-    "!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
+    "!llama stack list-deps experimental-post-training | xargs -L1 uv pip install"
   ]
  },
  {
--- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
+++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
@ -38,7 +38,7 @@
   "source": [
    "# NBVAL_SKIP\n",
    "!pip install -U llama-stack\n",
-    "!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
+    "llama stack list-deps fireworks | xargs -L1 uv pip install\n"
   ]
  },
  {
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
--- a/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb
+++ b/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb
@ -136,7 +136,8 @@
    "    \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
    "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
    "    process = subprocess.Popen(\n",
-    "        \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
+    "        \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
    "        \"uv run --with llama-stack llama stack run starter\",\n",
    "        shell=True,\n",
    "        stdout=log_file,\n",
    "        stderr=log_file,\n",
@ -172,7 +173,7 @@
    "\n",
    "def kill_llama_stack_server():\n",
    "    # Kill any existing llama stack server processes using pkill command\n",
-    "    os.system(\"pkill -f llama_stack.core.server.server\")"
+    "    os.system(\"pkill -f llama_stack.core.server.server\")\n"
   ]
  },
  {
--- a/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
+++ b/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
@ -105,7 +105,8 @@
    "    \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
    "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
    "    process = subprocess.Popen(\n",
-    "        \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
+    "        \"uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\",\n",
    "        \"uv run --with llama-stack llama stack run starter\",\n",
    "        shell=True,\n",
    "        stdout=log_file,\n",
    "        stderr=log_file,\n",
--- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
+++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
@ -92,7 +92,7 @@
   "metadata": {},
   "source": [
    "```bash\n",
-    "LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
+    "uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
    "```"
   ]
  },
--- a/docs/notebooks/nvidia/tool_calling/1_data_preparation.ipynb
+++ b/docs/notebooks/nvidia/tool_calling/1_data_preparation.ipynb
@ -81,7 +81,7 @@
   "metadata": {},
   "source": [
    "```bash\n",
-    "LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
+    "uv run --with llama-stack llama stack list-deps nvidia | xargs -L1 uv pip install\n",
    "```"
   ]
  },
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@ -1,366 +1,366 @@
 {
-  "cells": [
+ "cells": [
-    {
+  {
-      "cell_type": "markdown",
+   "cell_type": "markdown",
-      "id": "c1e7571c",
+   "id": "c1e7571c",
-      "metadata": {
+   "metadata": {
-        "id": "c1e7571c"
+    "id": "c1e7571c"
-      },
+   },
-      "source": [
+   "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
-        "\n",
+    "\n",
-        "# Llama Stack - Building AI Applications\n",
+    "# Llama Stack - Building AI Applications\n",
-        "\n",
+    "\n",
-        "<img src=\"https://llamastack.github.io/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
+    "<img src=\"https://llamastack.github.io/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
-        "\n",
+    "\n",
-        "Get started with Llama Stack in minutes!\n",
+    "Get started with Llama Stack in minutes!\n",
-        "\n",
+    "\n",
-        "[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
+    "[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
-        "\n",
+    "\n",
-        "In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
+    "In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
-        "as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
+    "as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
-      ]
+   ]
    },
    {
      "cell_type": "markdown",
      "id": "4CV1Q19BDMVw",
      "metadata": {
        "id": "4CV1Q19BDMVw"
      },
      "source": [
        "## Step 1: Install and setup"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "K4AvfUAJZOeS",
      "metadata": {
        "id": "K4AvfUAJZOeS"
      },
      "source": [
        "### 1.1. Install uv and test inference with Ollama\n",
        "\n",
        "We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "7a2d7b85",
      "metadata": {},
      "outputs": [],
      "source": [
        "%pip install uv llama_stack llama-stack-client\n",
        "\n",
        "## If running on Collab:\n",
        "# !pip install colab-xterm\n",
        "# %load_ext colabxterm\n",
        "\n",
        "!curl https://ollama.ai/install.sh | sh"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "39fa584b",
      "metadata": {},
      "source": [
        "### 1.2. Test inference with Ollama"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "3bf81522",
      "metadata": {},
      "source": [
        "We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "a7e8e0f1",
      "metadata": {},
      "outputs": [],
      "source": [
        "## If running on Colab:\n",
        "# %xterm\n",
        "\n",
        "## To be ran in the terminal:\n",
        "# ollama serve &\n",
        "# ollama run llama3.2:3b --keepalive 60m"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "f3c5f243",
      "metadata": {},
      "source": [
        "If successful, you should see the model respond to a prompt.\n",
        "\n",
        "...\n",
        "```\n",
        ">>> hi\n",
        "Hello! How can I assist you today?\n",
        "```"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "oDUB7M_qe-Gs",
      "metadata": {
        "id": "oDUB7M_qe-Gs"
      },
      "source": [
        "## Step 2: Run the Llama Stack server\n",
        "\n",
        "In this showcase, we will start a Llama Stack server that is running locally."
      ]
    },
    {
      "cell_type": "markdown",
      "id": "732eadc6",
      "metadata": {},
      "source": [
        "### 2.1. Setup the Llama Stack Server"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "J2kGed0R5PSf",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "collapsed": true,
        "id": "J2kGed0R5PSf",
        "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "import subprocess\n",
        "\n",
        "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
        "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
        "\n",
        "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
        "!uv run --with llama-stack llama stack build --distro starter\n",
        "\n",
        "def run_llama_stack_server_background():\n",
        "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
        "    process = subprocess.Popen(\n",
        "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
        "        shell=True,\n",
        "        stdout=log_file,\n",
        "        stderr=log_file,\n",
        "        text=True\n",
        "    )\n",
        "\n",
        "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
        "    return process\n",
        "\n",
        "def wait_for_server_to_start():\n",
        "    import requests\n",
        "    from requests.exceptions import ConnectionError\n",
        "    import time\n",
        "\n",
        "    url = \"http://0.0.0.0:8321/v1/health\"\n",
        "    max_retries = 30\n",
        "    retry_interval = 1\n",
        "\n",
        "    print(\"Waiting for server to start\", end=\"\")\n",
        "    for _ in range(max_retries):\n",
        "        try:\n",
        "            response = requests.get(url)\n",
        "            if response.status_code == 200:\n",
        "                print(\"\\nServer is ready!\")\n",
        "                return True\n",
        "        except ConnectionError:\n",
        "            print(\".\", end=\"\", flush=True)\n",
        "            time.sleep(retry_interval)\n",
        "\n",
        "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
        "    return False\n",
        "\n",
        "\n",
        "# use this helper if needed to kill the server\n",
        "def kill_llama_stack_server():\n",
        "    # Kill any existing llama stack server processes\n",
        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "c40e9efd",
      "metadata": {},
      "source": [
        "### 2.2. Start the Llama Stack Server"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "id": "f779283d",
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Starting Llama Stack server with PID: 787100\n",
            "Waiting for server to start\n",
            "Server is ready!\n"
          ]
        }
      ],
      "source": [
        "server_process = run_llama_stack_server_background()\n",
        "assert wait_for_server_to_start()"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "28477c03",
      "metadata": {},
      "source": [
        "## Step 3: Run the demo"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "id": "7da71011",
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
            "prompt> How do you do great work?\n",
            "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
            "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
            "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
            "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
            "\u001b[30m\u001b[0m"
          ]
        }
      ],
      "source": [
        "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
        "\n",
        "vector_db_id = \"my_demo_vector_db\"\n",
        "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
        "\n",
        "models = client.models.list()\n",
        "\n",
        "# Select the first ollama and first ollama's embedding model\n",
        "model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
        "embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
        "embedding_model_id = embedding_model.identifier\n",
        "embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
        "\n",
        "_ = client.vector_dbs.register(\n",
        "    vector_db_id=vector_db_id,\n",
        "    embedding_model=embedding_model_id,\n",
        "    embedding_dimension=embedding_dimension,\n",
        "    provider_id=\"faiss\",\n",
        ")\n",
        "source = \"https://www.paulgraham.com/greatwork.html\"\n",
        "print(\"rag_tool> Ingesting document:\", source)\n",
        "document = RAGDocument(\n",
        "    document_id=\"document_1\",\n",
        "    content=source,\n",
        "    mime_type=\"text/html\",\n",
        "    metadata={},\n",
        ")\n",
        "client.tool_runtime.rag_tool.insert(\n",
        "    documents=[document],\n",
        "    vector_db_id=vector_db_id,\n",
        "    chunk_size_in_tokens=50,\n",
        ")\n",
        "agent = Agent(\n",
        "    client,\n",
        "    model=model_id,\n",
        "    instructions=\"You are a helpful assistant\",\n",
        "    tools=[\n",
        "        {\n",
        "            \"name\": \"builtin::rag/knowledge_search\",\n",
        "            \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
        "        }\n",
        "    ],\n",
        ")\n",
        "\n",
        "prompt = \"How do you do great work?\"\n",
        "print(\"prompt>\", prompt)\n",
        "\n",
        "response = agent.create_turn(\n",
        "    messages=[{\"role\": \"user\", \"content\": prompt}],\n",
        "    session_id=agent.create_session(\"rag_session\"),\n",
        "    stream=True,\n",
        ")\n",
        "\n",
        "for log in AgentEventLogger().log(response):\n",
        "    log.print()"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "341aaadf",
      "metadata": {},
      "source": [
        "Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "e88e1185",
      "metadata": {},
      "source": [
        "## Next Steps"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "bcb73600",
      "metadata": {},
      "source": [
        "Now you're ready to dive deeper into Llama Stack!\n",
        "- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
        "- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
        "- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
        "- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
        "- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
        "- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
        "- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.6"
    }
  },
-  "nbformat": 4,
+  {
-  "nbformat_minor": 5
+   "cell_type": "markdown",
   "id": "4CV1Q19BDMVw",
   "metadata": {
    "id": "4CV1Q19BDMVw"
   },
   "source": [
    "## Step 1: Install and setup"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "K4AvfUAJZOeS",
   "metadata": {
    "id": "K4AvfUAJZOeS"
   },
   "source": [
    "### 1.1. Install uv and test inference with Ollama\n",
    "\n",
    "We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a2d7b85",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install uv llama_stack llama-stack-client\n",
    "\n",
    "## If running on Collab:\n",
    "# !pip install colab-xterm\n",
    "# %load_ext colabxterm\n",
    "\n",
    "!curl https://ollama.ai/install.sh | sh"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "39fa584b",
   "metadata": {},
   "source": [
    "### 1.2. Test inference with Ollama"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3bf81522",
   "metadata": {},
   "source": [
    "We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a7e8e0f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "## If running on Colab:\n",
    "# %xterm\n",
    "\n",
    "## To be ran in the terminal:\n",
    "# ollama serve &\n",
    "# ollama run llama3.2:3b --keepalive 60m"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3c5f243",
   "metadata": {},
   "source": [
    "If successful, you should see the model respond to a prompt.\n",
    "\n",
    "...\n",
    "```\n",
    ">>> hi\n",
    "Hello! How can I assist you today?\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "oDUB7M_qe-Gs",
   "metadata": {
    "id": "oDUB7M_qe-Gs"
   },
   "source": [
    "## Step 2: Run the Llama Stack server\n",
    "\n",
    "In this showcase, we will start a Llama Stack server that is running locally."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "732eadc6",
   "metadata": {},
   "source": [
    "### 2.1. Setup the Llama Stack Server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "J2kGed0R5PSf",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "collapsed": true,
    "id": "J2kGed0R5PSf",
    "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import subprocess\n",
    "\n",
    "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
    "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
    "\n",
    "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
    "!uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install\n",
    "\n",
    "def run_llama_stack_server_background():\n",
    "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
    "    process = subprocess.Popen(\n",
    "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
    "        shell=True,\n",
    "        stdout=log_file,\n",
    "        stderr=log_file,\n",
    "        text=True\n",
    "    )\n",
    "\n",
    "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
    "    return process\n",
    "\n",
    "def wait_for_server_to_start():\n",
    "    import requests\n",
    "    from requests.exceptions import ConnectionError\n",
    "    import time\n",
    "\n",
    "    url = \"http://0.0.0.0:8321/v1/health\"\n",
    "    max_retries = 30\n",
    "    retry_interval = 1\n",
    "\n",
    "    print(\"Waiting for server to start\", end=\"\")\n",
    "    for _ in range(max_retries):\n",
    "        try:\n",
    "            response = requests.get(url)\n",
    "            if response.status_code == 200:\n",
    "                print(\"\\nServer is ready!\")\n",
    "                return True\n",
    "        except ConnectionError:\n",
    "            print(\".\", end=\"\", flush=True)\n",
    "            time.sleep(retry_interval)\n",
    "\n",
    "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
    "    return False\n",
    "\n",
    "\n",
    "# use this helper if needed to kill the server\n",
    "def kill_llama_stack_server():\n",
    "    # Kill any existing llama stack server processes\n",
    "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c40e9efd",
   "metadata": {},
   "source": [
    "### 2.2. Start the Llama Stack Server"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f779283d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting Llama Stack server with PID: 787100\n",
      "Waiting for server to start\n",
      "Server is ready!\n"
     ]
    }
   ],
   "source": [
    "server_process = run_llama_stack_server_background()\n",
    "assert wait_for_server_to_start()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28477c03",
   "metadata": {},
   "source": [
    "## Step 3: Run the demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7da71011",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
      "prompt> How do you do great work?\n",
      "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
      "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
      "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
      "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
      "\u001b[30m\u001b[0m"
     ]
    }
   ],
   "source": [
    "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
    "\n",
    "vector_db_id = \"my_demo_vector_db\"\n",
    "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
    "\n",
    "models = client.models.list()\n",
    "\n",
    "# Select the first ollama and first ollama's embedding model\n",
    "model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
    "embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
    "embedding_model_id = embedding_model.identifier\n",
    "embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
    "\n",
    "_ = client.vector_dbs.register(\n",
    "    vector_db_id=vector_db_id,\n",
    "    embedding_model=embedding_model_id,\n",
    "    embedding_dimension=embedding_dimension,\n",
    "    provider_id=\"faiss\",\n",
    ")\n",
    "source = \"https://www.paulgraham.com/greatwork.html\"\n",
    "print(\"rag_tool> Ingesting document:\", source)\n",
    "document = RAGDocument(\n",
    "    document_id=\"document_1\",\n",
    "    content=source,\n",
    "    mime_type=\"text/html\",\n",
    "    metadata={},\n",
    ")\n",
    "client.tool_runtime.rag_tool.insert(\n",
    "    documents=[document],\n",
    "    vector_db_id=vector_db_id,\n",
    "    chunk_size_in_tokens=50,\n",
    ")\n",
    "agent = Agent(\n",
    "    client,\n",
    "    model=model_id,\n",
    "    instructions=\"You are a helpful assistant\",\n",
    "    tools=[\n",
    "        {\n",
    "            \"name\": \"builtin::rag/knowledge_search\",\n",
    "            \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
    "        }\n",
    "    ],\n",
    ")\n",
    "\n",
    "prompt = \"How do you do great work?\"\n",
    "print(\"prompt>\", prompt)\n",
    "\n",
    "response = agent.create_turn(\n",
    "    messages=[{\"role\": \"user\", \"content\": prompt}],\n",
    "    session_id=agent.create_session(\"rag_session\"),\n",
    "    stream=True,\n",
    ")\n",
    "\n",
    "for log in AgentEventLogger().log(response):\n",
    "    log.print()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "341aaadf",
   "metadata": {},
   "source": [
    "Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e88e1185",
   "metadata": {},
   "source": [
    "## Next Steps"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bcb73600",
   "metadata": {},
   "source": [
    "Now you're ready to dive deeper into Llama Stack!\n",
    "- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
    "- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
    "- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
    "- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
    "- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
    "- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
    "- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/docs/src/pages/index.js
+++ b/docs/src/pages/index.js
@ -47,11 +47,11 @@ function QuickStart() {
              <pre><code>{`# Install uv and start Ollama
 ollama run llama3.2:3b --keepalive 60m
 # Install server dependencies
 uv run --with llama-stack llama stack list-deps starter | xargs -L1 uv pip install
 # Run Llama Stack server
-OLLAMA_URL=http://localhost:11434 \\
+OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter
  uv run --with llama-stack \\
  llama stack build --distro starter \\
  --image-type venv --run
 # Try the Python SDK
 from llama_stack_client import LlamaStackClient
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -9024,6 +9024,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9901,6 +9905,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
@ -13449,8 +13457,8 @@
        },
        {
            "name": "Eval",
-            "description": "",
+            "description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
-            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
+            "x-displayName": "Evaluations"
        },
        {
            "name": "Files",
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -6734,6 +6734,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7403,6 +7407,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
@ -10196,9 +10204,9 @@ tags:
  - name: Datasets
    description: ''
  - name: Eval
-    description: ''
+    description: >-
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
    x-displayName: Evaluations
  - name: Files
    description: >-
      This API is used to upload documents that can be used with other Llama Stack
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@ -5518,8 +5518,8 @@
        },
        {
            "name": "Eval",
-            "description": "",
+            "description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
-            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
+            "x-displayName": "Evaluations"
        },
        {
            "name": "PostTraining (Coming Soon)",
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -4119,9 +4119,9 @@ tags:
  - name: Datasets
    description: ''
  - name: Eval
-    description: ''
+    description: >-
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
    x-displayName: Evaluations
  - name: PostTraining (Coming Soon)
    description: ''
 x-tagGroups:
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -282,7 +282,7 @@
                    "Conversations"
                ],
                "summary": "Create a conversation.",
-                "description": "Create a conversation.",
+                "description": "Create a conversation.\nCreate a conversation.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -326,8 +326,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Get a conversation with the given ID.",
+                "summary": "Retrieve a conversation.",
-                "description": "Get a conversation with the given ID.",
+                "description": "Retrieve a conversation.\nGet a conversation with the given ID.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -369,8 +369,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Update a conversation's metadata with the given ID.",
+                "summary": "Update a conversation.",
-                "description": "Update a conversation's metadata with the given ID.",
+                "description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -422,8 +422,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Delete a conversation with the given ID.",
+                "summary": "Delete a conversation.",
-                "description": "Delete a conversation with the given ID.",
+                "description": "Delete a conversation.\nDelete a conversation with the given ID.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -467,8 +467,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "List items in the conversation.",
+                "summary": "List items.",
-                "description": "List items in the conversation.",
+                "description": "List items.\nList items in the conversation.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -597,8 +597,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Create items in the conversation.",
+                "summary": "Create items.",
-                "description": "Create items in the conversation.",
+                "description": "Create items.\nCreate items in the conversation.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -652,8 +652,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Retrieve a conversation item.",
+                "summary": "Retrieve an item.",
-                "description": "Retrieve a conversation item.",
+                "description": "Retrieve an item.\nRetrieve a conversation item.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -704,8 +704,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Delete a conversation item.",
+                "summary": "Delete an item.",
-                "description": "Delete a conversation item.",
+                "description": "Delete an item.\nDelete a conversation item.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -7600,6 +7600,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -8148,6 +8152,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
@ -13251,8 +13259,8 @@
        },
        {
            "name": "Conversations",
-            "description": "",
+            "description": "Protocol for conversation management operations.",
-            "x-displayName": "Protocol for conversation management operations."
+            "x-displayName": "Conversations"
        },
        {
            "name": "Files",
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -192,7 +192,10 @@ paths:
      tags:
        - Conversations
      summary: Create a conversation.
-      description: Create a conversation.
+      description: >-
        Create a conversation.
        Create a conversation.
      parameters: []
      requestBody:
        content:
@ -222,8 +225,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Get a conversation with the given ID.
+      summary: Retrieve a conversation.
-      description: Get a conversation with the given ID.
+      description: >-
        Retrieve a conversation.
        Get a conversation with the given ID.
      parameters:
        - name: conversation_id
          in: path
@ -252,9 +258,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: >-
+      summary: Update a conversation.
        Update a conversation's metadata with the given ID.
      description: >-
        Update a conversation.
        Update a conversation's metadata with the given ID.
      parameters:
        - name: conversation_id
@ -290,8 +297,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Delete a conversation with the given ID.
+      summary: Delete a conversation.
-      description: Delete a conversation with the given ID.
+      description: >-
        Delete a conversation.
        Delete a conversation with the given ID.
      parameters:
        - name: conversation_id
          in: path
@ -321,8 +331,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: List items in the conversation.
+      summary: List items.
-      description: List items in the conversation.
+      description: >-
        List items.
        List items in the conversation.
      parameters:
        - name: conversation_id
          in: path
@ -495,8 +508,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Create items in the conversation.
+      summary: Create items.
-      description: Create items in the conversation.
+      description: >-
        Create items.
        Create items in the conversation.
      parameters:
        - name: conversation_id
          in: path
@ -532,8 +548,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Retrieve a conversation item.
+      summary: Retrieve an item.
-      description: Retrieve a conversation item.
+      description: >-
        Retrieve an item.
        Retrieve a conversation item.
      parameters:
        - name: conversation_id
          in: path
@ -568,8 +587,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Delete a conversation item.
+      summary: Delete an item.
-      description: Delete a conversation item.
+      description: >-
        Delete an item.
        Delete a conversation item.
      parameters:
        - name: conversation_id
          in: path
@ -5793,6 +5815,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -6196,6 +6222,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
@ -10146,9 +10176,9 @@ tags:
      - `background`
    x-displayName: Agents
  - name: Conversations
-    description: ''
+    description: >-
    x-displayName: >-
      Protocol for conversation management operations.
    x-displayName: Conversations
  - name: Files
    description: >-
      This API is used to upload documents that can be used with other Llama Stack
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -282,7 +282,7 @@
                    "Conversations"
                ],
                "summary": "Create a conversation.",
-                "description": "Create a conversation.",
+                "description": "Create a conversation.\nCreate a conversation.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -326,8 +326,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Get a conversation with the given ID.",
+                "summary": "Retrieve a conversation.",
-                "description": "Get a conversation with the given ID.",
+                "description": "Retrieve a conversation.\nGet a conversation with the given ID.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -369,8 +369,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Update a conversation's metadata with the given ID.",
+                "summary": "Update a conversation.",
-                "description": "Update a conversation's metadata with the given ID.",
+                "description": "Update a conversation.\nUpdate a conversation's metadata with the given ID.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -422,8 +422,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Delete a conversation with the given ID.",
+                "summary": "Delete a conversation.",
-                "description": "Delete a conversation with the given ID.",
+                "description": "Delete a conversation.\nDelete a conversation with the given ID.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -467,8 +467,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "List items in the conversation.",
+                "summary": "List items.",
-                "description": "List items in the conversation.",
+                "description": "List items.\nList items in the conversation.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -597,8 +597,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Create items in the conversation.",
+                "summary": "Create items.",
-                "description": "Create items in the conversation.",
+                "description": "Create items.\nCreate items in the conversation.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -652,8 +652,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Retrieve a conversation item.",
+                "summary": "Retrieve an item.",
-                "description": "Retrieve a conversation item.",
+                "description": "Retrieve an item.\nRetrieve a conversation item.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -704,8 +704,8 @@
                "tags": [
                    "Conversations"
                ],
-                "summary": "Delete a conversation item.",
+                "summary": "Delete an item.",
-                "description": "Delete a conversation item.",
+                "description": "Delete an item.\nDelete a conversation item.",
                "parameters": [
                    {
                        "name": "conversation_id",
@ -9272,6 +9272,10 @@
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    },
                    "input": {
                        "type": "array",
                        "items": {
@ -9820,6 +9824,10 @@
                    "usage": {
                        "$ref": "#/components/schemas/OpenAIResponseUsage",
                        "description": "(Optional) Token usage information for the response"
                    },
                    "instructions": {
                        "type": "string",
                        "description": "(Optional) System message inserted into the model's context"
                    }
                },
                "additionalProperties": false,
@ -17928,8 +17936,8 @@
        },
        {
            "name": "Conversations",
-            "description": "",
+            "description": "Protocol for conversation management operations.",
-            "x-displayName": "Protocol for conversation management operations."
+            "x-displayName": "Conversations"
        },
        {
            "name": "DatasetIO",
@ -17941,8 +17949,8 @@
        },
        {
            "name": "Eval",
-            "description": "",
+            "description": "Llama Stack Evaluation API for running evaluations on model and agent candidates.",
-            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
+            "x-displayName": "Evaluations"
        },
        {
            "name": "Files",
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -195,7 +195,10 @@ paths:
      tags:
        - Conversations
      summary: Create a conversation.
-      description: Create a conversation.
+      description: >-
        Create a conversation.
        Create a conversation.
      parameters: []
      requestBody:
        content:
@ -225,8 +228,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Get a conversation with the given ID.
+      summary: Retrieve a conversation.
-      description: Get a conversation with the given ID.
+      description: >-
        Retrieve a conversation.
        Get a conversation with the given ID.
      parameters:
        - name: conversation_id
          in: path
@ -255,9 +261,10 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: >-
+      summary: Update a conversation.
        Update a conversation's metadata with the given ID.
      description: >-
        Update a conversation.
        Update a conversation's metadata with the given ID.
      parameters:
        - name: conversation_id
@ -293,8 +300,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Delete a conversation with the given ID.
+      summary: Delete a conversation.
-      description: Delete a conversation with the given ID.
+      description: >-
        Delete a conversation.
        Delete a conversation with the given ID.
      parameters:
        - name: conversation_id
          in: path
@ -324,8 +334,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: List items in the conversation.
+      summary: List items.
-      description: List items in the conversation.
+      description: >-
        List items.
        List items in the conversation.
      parameters:
        - name: conversation_id
          in: path
@ -498,8 +511,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Create items in the conversation.
+      summary: Create items.
-      description: Create items in the conversation.
+      description: >-
        Create items.
        Create items in the conversation.
      parameters:
        - name: conversation_id
          in: path
@ -535,8 +551,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Retrieve a conversation item.
+      summary: Retrieve an item.
-      description: Retrieve a conversation item.
+      description: >-
        Retrieve an item.
        Retrieve a conversation item.
      parameters:
        - name: conversation_id
          in: path
@ -571,8 +590,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Conversations
-      summary: Delete a conversation item.
+      summary: Delete an item.
-      description: Delete a conversation item.
+      description: >-
        Delete an item.
        Delete a conversation item.
      parameters:
        - name: conversation_id
          in: path
@ -7006,6 +7028,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
        input:
          type: array
          items:
@ -7409,6 +7435,10 @@ components:
          $ref: '#/components/schemas/OpenAIResponseUsage'
          description: >-
            (Optional) Token usage information for the response
        instructions:
          type: string
          description: >-
            (Optional) System message inserted into the model's context
      additionalProperties: false
      required:
        - created_at
@ -13533,17 +13563,17 @@ tags:
  - name: Benchmarks
    description: ''
  - name: Conversations
-    description: ''
+    description: >-
    x-displayName: >-
      Protocol for conversation management operations.
    x-displayName: Conversations
  - name: DatasetIO
    description: ''
  - name: Datasets
    description: ''
  - name: Eval
-    description: ''
+    description: >-
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
    x-displayName: Evaluations
  - name: Files
    description: >-
      This API is used to upload documents that can be used with other Llama Stack
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@ -78,17 +78,14 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
 ## Build, Configure, and Run Llama Stack
-1. **Build the Llama Stack**:
+1. **Install dependencies**:
   Build the Llama Stack using the `starter` template:
   ```bash
-   uv run --with llama-stack llama stack build --distro starter --image-type venv
+   llama stack list-deps starter | xargs -L1 uv pip install
   ```
-   **Expected Output:**
+
 2. **Start the distribution**:
   ```bash
-   ...
+   llama stack run starter
   Build Successful!
   You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
   You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
   ```
 3. **Set the ENV variables by exporting them to the terminal**:
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
    :param tools: (Optional) An array of tools the model may call while generating a response.
    :param truncation: (Optional) Truncation strategy applied to the response
    :param usage: (Optional) Token usage information for the response
    :param instructions: (Optional) System message inserted into the model's context
    """
    created_at: int
@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
    tools: list[OpenAIResponseTool] | None = None
    truncation: str | None = None
    usage: OpenAIResponseUsage | None = None
    instructions: str | None = None
@json_schema_type
--- a/llama_stack/apis/conversations/conversations.py
+++ b/llama_stack/apis/conversations/conversations.py
@ -173,7 +173,9 @@ class ConversationItemDeletedResource(BaseModel):
@runtime_checkable
@trace_protocol
 class Conversations(Protocol):
-    """Protocol for conversation management operations."""
+    """Conversations
    Protocol for conversation management operations."""
    @webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
    async def create_conversation(
@ -181,6 +183,8 @@ class Conversations(Protocol):
    ) -> Conversation:
        """Create a conversation.
        Create a conversation.
        :param items: Initial items to include in the conversation context.
        :param metadata: Set of key-value pairs that can be attached to an object.
        :returns: The created conversation object.
@ -189,7 +193,9 @@ class Conversations(Protocol):
    @webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_conversation(self, conversation_id: str) -> Conversation:
-        """Get a conversation with the given ID.
+        """Retrieve a conversation.
        Get a conversation with the given ID.
        :param conversation_id: The conversation identifier.
        :returns: The conversation object.
@ -198,7 +204,9 @@ class Conversations(Protocol):
    @webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
    async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
-        """Update a conversation's metadata with the given ID.
+        """Update a conversation.
        Update a conversation's metadata with the given ID.
        :param conversation_id: The conversation identifier.
        :param metadata: Set of key-value pairs that can be attached to an object.
@ -208,7 +216,9 @@ class Conversations(Protocol):
    @webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
-        """Delete a conversation with the given ID.
+        """Delete a conversation.
        Delete a conversation with the given ID.
        :param conversation_id: The conversation identifier.
        :returns: The deleted conversation resource.
@ -217,7 +227,9 @@ class Conversations(Protocol):
    @webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
    async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
-        """Create items in the conversation.
+        """Create items.
        Create items in the conversation.
        :param conversation_id: The conversation identifier.
        :param items: Items to include in the conversation context.
@ -227,7 +239,9 @@ class Conversations(Protocol):
    @webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
-        """Retrieve a conversation item.
+        """Retrieve an item.
        Retrieve a conversation item.
        :param conversation_id: The conversation identifier.
        :param item_id: The item identifier.
@ -244,7 +258,9 @@ class Conversations(Protocol):
        limit: int | NotGiven = NOT_GIVEN,
        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
    ) -> ConversationItemList:
-        """List items in the conversation.
+        """List items.
        List items in the conversation.
        :param conversation_id: The conversation identifier.
        :param after: An item ID to list items after, used in pagination.
@ -259,7 +275,9 @@ class Conversations(Protocol):
    async def openai_delete_conversation_item(
        self, conversation_id: str, item_id: str
    ) -> ConversationItemDeletedResource:
-        """Delete a conversation item.
+        """Delete an item.
        Delete a conversation item.
        :param conversation_id: The conversation identifier.
        :param item_id: The item identifier.
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -82,7 +82,9 @@ class EvaluateResponse(BaseModel):
 class Eval(Protocol):
-    """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
+    """Evaluations
    Llama Stack Evaluation API for running evaluations on model and agent candidates."""
    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -40,12 +40,20 @@ from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.resolver import InvalidProviderError
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    ServerStoresConfig,
    SqliteKVStoreConfig,
    SqliteSqlStoreConfig,
    SqlStoreReference,
    StorageConfig,
 )
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.exec import formulate_run_args, run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@ -286,21 +294,42 @@ def _generate_run_config(
    Generate a run.yaml template file for user to edit from a build.yaml file
    """
    apis = list(build_config.distribution_spec.providers.keys())
    distro_dir = DISTRIBS_BASE_DIR / image_name
    storage = StorageConfig(
        backends={
            "kv_default": SqliteKVStoreConfig(
                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
            ),
            "sql_default": SqliteSqlStoreConfig(
                db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
            ),
        },
        stores=ServerStoresConfig(
            metadata=KVStoreReference(
                backend="kv_default",
                namespace="registry",
            ),
            inference=InferenceStoreReference(
                backend="sql_default",
                table_name="inference_store",
            ),
            conversations=SqlStoreReference(
                backend="sql_default",
                table_name="openai_conversations",
            ),
        ),
    )
    run_config = StackRunConfig(
        container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
        image_name=image_name,
        apis=apis,
        providers={},
        storage=storage,
        external_providers_dir=build_config.external_providers_dir
        if build_config.external_providers_dir
        else EXTERNAL_PROVIDERS_DIR,
    )
    if not run_config.inference_store:
        run_config.inference_store = SqliteSqlStoreConfig(
            **SqliteSqlStoreConfig.sample_run_config(
                __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
            )
        )
    # build providers dict
    provider_registry = get_provider_registry(build_config)
    for api in apis:
--- a/llama_stack/cli/stack/_list_deps.py
+++ b/llama_stack/cli/stack/_list_deps.py
@ -0,0 +1,182 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import argparse
 import sys
 from pathlib import Path
 import yaml
 from termcolor import cprint
 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.core.build import get_provider_dependencies
 from llama_stack.core.datatypes import (
    BuildConfig,
    BuildProvider,
    DistributionSpec,
 )
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.stack import replace_env_vars
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
 logger = get_logger(name=__name__, category="cli")
 # These are the dependencies needed by the distribution server.
 # `llama-stack` is automatically installed by the installation script.
 SERVER_DEPENDENCIES = [
    "aiosqlite",
    "fastapi",
    "fire",
    "httpx",
    "uvicorn",
    "opentelemetry-sdk",
    "opentelemetry-exporter-otlp-proto-http",
 ]
 def format_output_deps_only(
    normal_deps: list[str],
    special_deps: list[str],
    external_deps: list[str],
    uv: bool = False,
 ) -> str:
    """Format dependencies as a list."""
    lines = []
    uv_str = ""
    if uv:
        uv_str = "uv pip install "
    # Quote deps with commas
    quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
    lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
    for special_dep in special_deps:
        lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
    for external_dep in external_deps:
        lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
    return "\n".join(lines)
 def run_stack_list_deps_command(args: argparse.Namespace) -> None:
    if args.config:
        try:
            from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
            config_file = resolve_config_or_distro(args.config, Mode.BUILD)
        except ValueError as e:
            cprint(
                f"Could not parse config file {args.config}: {e}",
                color="red",
                file=sys.stderr,
            )
            sys.exit(1)
        if config_file:
            with open(config_file) as f:
                try:
                    contents = yaml.safe_load(f)
                    contents = replace_env_vars(contents)
                    build_config = BuildConfig(**contents)
                    build_config.image_type = "venv"
                except Exception as e:
                    cprint(
                        f"Could not parse config file {config_file}: {e}",
                        color="red",
                        file=sys.stderr,
                    )
                    sys.exit(1)
    elif args.providers:
        provider_list: dict[str, list[BuildProvider]] = dict()
        for api_provider in args.providers.split(","):
            if "=" not in api_provider:
                cprint(
                    "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
                    color="red",
                    file=sys.stderr,
                )
                sys.exit(1)
            api, provider_type = api_provider.split("=")
            providers_for_api = get_provider_registry().get(Api(api), None)
            if providers_for_api is None:
                cprint(
                    f"{api} is not a valid API.",
                    color="red",
                    file=sys.stderr,
                )
                sys.exit(1)
            if provider_type in providers_for_api:
                provider = BuildProvider(
                    provider_type=provider_type,
                    module=None,
                )
                provider_list.setdefault(api, []).append(provider)
            else:
                cprint(
                    f"{provider_type} is not a valid provider for the {api} API.",
                    color="red",
                    file=sys.stderr,
                )
                sys.exit(1)
        distribution_spec = DistributionSpec(
            providers=provider_list,
            description=",".join(args.providers),
        )
        build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
    normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
    normal_deps += SERVER_DEPENDENCIES
    # Add external API dependencies
    if build_config.external_apis_dir:
        from llama_stack.core.external import load_external_apis
        external_apis = load_external_apis(build_config)
        if external_apis:
            for _, api_spec in external_apis.items():
                normal_deps.extend(api_spec.pip_packages)
    # Format and output based on requested format
    output = format_output_deps_only(
        normal_deps=normal_deps,
        special_deps=special_deps,
        external_deps=external_provider_dependencies,
        uv=args.format == "uv",
    )
    print(output)
 def quote_if_needed(dep):
    # Add quotes if the dependency contains special characters that need escaping in shell
    # This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
    needs_quoting = any(char in dep for char in [",", "<", ">", "="])
    return f"'{dep}'" if needs_quoting else dep
 def quote_special_dep(dep_string):
    """
    Quote individual packages in a special dependency string.
    Special deps may contain multiple packages and flags like --extra-index-url.
    We need to quote only the package specs that contain special characters.
    """
    parts = dep_string.split()
    quoted_parts = []
    for part in parts:
        # Don't quote flags (they start with -)
        if part.startswith("-"):
            quoted_parts.append(part)
        else:
            # Quote package specs that need it
            quoted_parts.append(quote_if_needed(part))
    return " ".join(quoted_parts)
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -8,6 +8,9 @@ import textwrap
 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.cli.subcommand import Subcommand
 from llama_stack.log import get_logger
 logger = get_logger(__name__, category="cli")
 class StackBuild(Subcommand):
@ -16,7 +19,7 @@ class StackBuild(Subcommand):
        self.parser = subparsers.add_parser(
            "build",
            prog="llama stack build",
-            description="Build a Llama stack container",
+            description="[DEPRECATED] Build a Llama stack container. This command is deprecated and will be removed in a future release. Use `llama stack list-deps <distro>' instead.",
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        )
        self._add_arguments()
@ -93,6 +96,9 @@ the build. If not specified, currently active environment will be used if found.
        )
    def _run_stack_build_command(self, args: argparse.Namespace) -> None:
        logger.warning(
            "The 'llama stack build' command is deprecated and will be removed in a future release. Please use 'llama stack list-deps'"
        )
        # always keep implementation completely silo-ed away from CLI so CLI
        # can be fast to load and reduces dependencies
        from ._build import run_stack_build_command
--- a/llama_stack/cli/stack/list_deps.py
+++ b/llama_stack/cli/stack/list_deps.py
@ -0,0 +1,51 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import argparse
 from llama_stack.cli.subcommand import Subcommand
 class StackListDeps(Subcommand):
    def __init__(self, subparsers: argparse._SubParsersAction):
        super().__init__()
        self.parser = subparsers.add_parser(
            "list-deps",
            prog="llama stack list-deps",
            description="list the dependencies for a llama stack distribution",
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        )
        self._add_arguments()
        self.parser.set_defaults(func=self._run_stack_list_deps_command)
    def _add_arguments(self):
        self.parser.add_argument(
            "config",
            type=str,
            nargs="?",  # Make it optional
            metavar="config | distro",
            help="Path to config file to use or name of known distro (llama stack list for a list).",
        )
        self.parser.add_argument(
            "--providers",
            type=str,
            default=None,
            help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
        )
        self.parser.add_argument(
            "--format",
            type=str,
            choices=["uv", "deps-only"],
            default="deps-only",
            help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
        )
    def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
        # always keep implementation completely silo-ed away from CLI so CLI
        # can be fast to load and reduces dependencies
        from ._list_deps import run_stack_list_deps_command
        return run_stack_list_deps_command(args)
--- a/llama_stack/cli/stack/stack.py
+++ b/llama_stack/cli/stack/stack.py
@ -13,6 +13,7 @@ from llama_stack.cli.subcommand import Subcommand
 from .build import StackBuild
 from .list_apis import StackListApis
 from .list_deps import StackListDeps
 from .list_providers import StackListProviders
 from .remove import StackRemove
 from .run import StackRun
@ -39,6 +40,7 @@ class StackParser(Subcommand):
        subparsers = self.parser.add_subparsers(title="stack_subcommands")
        # Add sub-commands
        StackListDeps.create(subparsers)
        StackBuild.create(subparsers)
        StackListApis.create(subparsers)
        StackListProviders.create(subparsers)
--- a/llama_stack/cli/stack/utils.py
+++ b/llama_stack/cli/stack/utils.py
@ -4,7 +4,37 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
 import sys
 from enum import Enum
 from functools import lru_cache
 from pathlib import Path
 import yaml
 from termcolor import cprint
 from llama_stack.core.datatypes import (
    BuildConfig,
    Provider,
    StackRunConfig,
    StorageConfig,
 )
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.resolver import InvalidProviderError
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    ServerStoresConfig,
    SqliteKVStoreConfig,
    SqliteSqlStoreConfig,
    SqlStoreReference,
 )
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
 class ImageType(Enum):
@ -19,3 +49,103 @@ def print_subcommand_description(parser, subparsers):
        description = subcommand.description
        description_text += f"  {name:<21} {description}\n"
    parser.epilog = description_text
 def generate_run_config(
    build_config: BuildConfig,
    build_dir: Path,
    image_name: str,
 ) -> Path:
    """
    Generate a run.yaml template file for user to edit from a build.yaml file
    """
    apis = list(build_config.distribution_spec.providers.keys())
    distro_dir = DISTRIBS_BASE_DIR / image_name
    run_config = StackRunConfig(
        container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
        image_name=image_name,
        apis=apis,
        providers={},
        storage=StorageConfig(
            backends={
                "kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
                "sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
            },
            stores=ServerStoresConfig(
                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
                conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
            ),
        ),
        external_providers_dir=build_config.external_providers_dir
        if build_config.external_providers_dir
        else EXTERNAL_PROVIDERS_DIR,
    )
    # build providers dict
    provider_registry = get_provider_registry(build_config)
    for api in apis:
        run_config.providers[api] = []
        providers = build_config.distribution_spec.providers[api]
        for provider in providers:
            pid = provider.provider_type.split("::")[-1]
            p = provider_registry[Api(api)][provider.provider_type]
            if p.deprecation_error:
                raise InvalidProviderError(p.deprecation_error)
            try:
                config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
            except (ModuleNotFoundError, ValueError) as exc:
                # HACK ALERT:
                # This code executes after building is done, the import cannot work since the
                # package is either available in the venv or container - not available on the host.
                # TODO: use a "is_external" flag in ProviderSpec to check if the provider is
                # external
                cprint(
                    f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
                    color="yellow",
                    file=sys.stderr,
                )
                # Set config_type to None to avoid UnboundLocalError
                config_type = None
            if config_type is not None and hasattr(config_type, "sample_run_config"):
                config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
            else:
                config = {}
            p_spec = Provider(
                provider_id=pid,
                provider_type=provider.provider_type,
                config=config,
                module=provider.module,
            )
            run_config.providers[api].append(p_spec)
    run_config_file = build_dir / f"{image_name}-run.yaml"
    with open(run_config_file, "w") as f:
        to_write = json.loads(run_config.model_dump_json())
        f.write(yaml.dump(to_write, sort_keys=False))
    # Only print this message for non-container builds since it will be displayed before the
    # container is built
    # For non-container builds, the run.yaml is generated at the very end of the build process so it
    # makes sense to display this message
    if build_config.image_type != LlamaStackImageType.CONTAINER.value:
        cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
    return run_config_file
@lru_cache
 def available_templates_specs() -> dict[str, BuildConfig]:
    import yaml
    template_specs = {}
    for p in TEMPLATES_PATH.rglob("*build.yaml"):
        template_name = p.parent.name
        with open(p) as f:
            build_config = BuildConfig(**yaml.safe_load(f))
            template_specs[template_name] = build_config
    return template_specs
--- a/llama_stack/core/configure.py
+++ b/llama_stack/core/configure.py
@ -159,6 +159,37 @@ def upgrade_from_routing_table(
    config_dict["apis"] = config_dict["apis_to_serve"]
    config_dict.pop("apis_to_serve", None)
    # Add default storage config if not present
    if "storage" not in config_dict:
        config_dict["storage"] = {
            "backends": {
                "kv_default": {
                    "type": "kv_sqlite",
                    "db_path": "~/.llama/kvstore.db",
                },
                "sql_default": {
                    "type": "sql_sqlite",
                    "db_path": "~/.llama/sql_store.db",
                },
            },
            "stores": {
                "metadata": {
                    "namespace": "registry",
                    "backend": "kv_default",
                },
                "inference": {
                    "table_name": "inference_store",
                    "backend": "sql_default",
                    "max_write_queue_size": 10000,
                    "num_writers": 4,
                },
                "conversations": {
                    "table_name": "openai_conversations",
                    "backend": "sql_default",
                },
            },
        }
    return config_dict
--- a/llama_stack/core/conversations/conversations.py
+++ b/llama_stack/core/conversations/conversations.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import os
 import secrets
 import time
 from typing import Any
@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import (
    Conversations,
    Metadata,
 )
-from llama_stack.core.datatypes import AccessRule
+from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
 from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from llama_stack.providers.utils.sqlstore.sqlstore import (
+from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
    SqliteSqlStoreConfig,
    SqlStoreConfig,
    sqlstore_impl,
 )
 logger = get_logger(name=__name__, category="openai_conversations")
@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations")
 class ConversationServiceConfig(BaseModel):
    """Configuration for the built-in conversation service.
-    :param conversations_store: SQL store configuration for conversations (defaults to SQLite)
+    :param run_config: Stack run configuration for resolving persistence
    :param policy: Access control rules
    """
-    conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
+    run_config: StackRunConfig
        db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
    )
    policy: list[AccessRule] = []
@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations):
        self.deps = deps
        self.policy = config.policy
-        base_sql_store = sqlstore_impl(config.conversations_store)
+        # Use conversations store reference from run config
        conversations_ref = config.run_config.storage.stores.conversations
        if not conversations_ref:
            raise ValueError("storage.stores.conversations must be configured in run config")
        base_sql_store = sqlstore_impl(conversations_ref)
        self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
    async def initialize(self) -> None:
        """Initialize the store and create tables."""
        if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
            os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
        await self.sql_store.create_table(
            "openai_conversations",
            {
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@ -26,9 +26,12 @@ from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
    KVStoreReference,
    StorageBackendType,
    StorageConfig,
 )
 from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
@ -382,7 +385,7 @@ class QuotaPeriod(StrEnum):
 class QuotaConfig(BaseModel):
-    kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
+    kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
    anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
    authenticated_max_requests: int = Field(
        default=1000, description="Max requests for authenticated clients per period"
@ -464,18 +467,6 @@ class ServerConfig(BaseModel):
    )
 class InferenceStoreConfig(BaseModel):
    sql_store_config: SqlStoreConfig
    max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
    num_writers: int = Field(default=4, description="Number of concurrent background writers")
 class ResponsesStoreConfig(BaseModel):
    sql_store_config: SqlStoreConfig
    max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
    num_writers: int = Field(default=4, description="Number of concurrent background writers")
 class StackRunConfig(BaseModel):
    version: int = LLAMA_STACK_RUN_CONFIG_VERSION
@ -502,26 +493,8 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re
 can be instantiated multiple times (with different configs) if necessary.
 """,
    )
-    metadata_store: KVStoreConfig | None = Field(
+    storage: StorageConfig = Field(
-        default=None,
+        description="Catalog of named storage backends and references available to the stack",
        description="""
 Configuration for the persistence store used by the distribution registry. If not specified,
 a default SQLite store will be used.""",
    )
    inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
        default=None,
        description="""
 Configuration for the persistence store used by the inference API. Can be either a
 InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
 If not specified, a default SQLite store will be used.""",
    )
    conversations_store: SqlStoreConfig | None = Field(
        default=None,
        description="""
 Configuration for the persistence store used by the conversations API.
 If not specified, a default SQLite store will be used.""",
    )
    # registry of "resources" in the distribution
@ -566,6 +539,49 @@ If not specified, a default SQLite store will be used.""",
            return Path(v)
        return v
    @model_validator(mode="after")
    def validate_server_stores(self) -> "StackRunConfig":
        backend_map = self.storage.backends
        stores = self.storage.stores
        kv_backends = {
            name
            for name, cfg in backend_map.items()
            if cfg.type
            in {
                StorageBackendType.KV_REDIS,
                StorageBackendType.KV_SQLITE,
                StorageBackendType.KV_POSTGRES,
                StorageBackendType.KV_MONGODB,
            }
        }
        sql_backends = {
            name
            for name, cfg in backend_map.items()
            if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
        }
        def _ensure_backend(reference, expected_set, store_name: str) -> None:
            if reference is None:
                return
            backend_name = reference.backend
            if backend_name not in backend_map:
                raise ValueError(
                    f"{store_name} references unknown backend '{backend_name}'. "
                    f"Available backends: {sorted(backend_map)}"
                )
            if backend_name not in expected_set:
                raise ValueError(
                    f"{store_name} references backend '{backend_name}' of type "
                    f"'{backend_map[backend_name].type.value}', but a backend of type "
                    f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
                )
        _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
        _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
        _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
        _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
        return self
 class BuildConfig(BaseModel):
    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
--- a/llama_stack/core/prompts/prompts.py
+++ b/llama_stack/core/prompts/prompts.py
@ -11,9 +11,8 @@ from pydantic import BaseModel
 from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 class PromptServiceConfig(BaseModel):
@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts):
        self.kvstore: KVStore
    async def initialize(self) -> None:
-        kvstore_config = SqliteKVStoreConfig(
+        # Use metadata store backend with prompts-specific namespace
-            db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
+        metadata_ref = self.config.run_config.storage.stores.metadata
-        )
+        if not metadata_ref:
-        self.kvstore = await kvstore_impl(kvstore_config)
+            raise ValueError("storage.stores.metadata must be configured in run config")
        prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
        self.kvstore = await kvstore_impl(prompts_ref)
    def _get_default_key(self, prompt_id: str) -> str:
        """Get the KVStore key that stores the default version number."""
--- a/llama_stack/core/resolver.py
+++ b/llama_stack/core/resolver.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import importlib
 import importlib.metadata
 import inspect
 from typing import Any
--- a/llama_stack/core/routers/init.py
+++ b/llama_stack/core/routers/init.py
@ -6,7 +6,10 @@
 from typing import Any
-from llama_stack.core.datatypes import AccessRule, RoutedProtocol
+from llama_stack.core.datatypes import (
    AccessRule,
    RoutedProtocol,
 )
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.providers.datatypes import Api, RoutingTable
@ -78,9 +81,13 @@ async def get_auto_router_impl(
                api_to_dep_impl[dep_name] = deps[dep_api]
    # TODO: move pass configs to routers instead
-    if api == Api.inference and run_config.inference_store:
+    if api == Api.inference:
        inference_ref = run_config.storage.stores.inference
        if not inference_ref:
            raise ValueError("storage.stores.inference must be configured in run config")
        inference_store = InferenceStore(
-            config=run_config.inference_store,
+            reference=inference_ref,
            policy=policy,
        )
        await inference_store.initialize()
--- a/llama_stack/core/server/auth_providers.py
+++ b/llama_stack/core/server/auth_providers.py
@ -72,13 +72,30 @@ class AuthProvider(ABC):
 def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
    attributes: dict[str, list[str]] = {}
    for claim_key, attribute_key in mapping.items():
-        if claim_key not in claims:
+        # First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles")
        # Then fall back to literal key with dots (e.g., "my.dotted.key")
        claim: object = claims
        keys = claim_key.split(".")
        for key in keys:
            if isinstance(claim, dict) and key in claim:
                claim = claim[key]
            else:
                claim = None
                break
        if claim is None and claim_key in claims:
            # Fall back to checking if claim_key exists as a literal key
            claim = claims[claim_key]
        if claim is None:
            continue
-        claim = claims[claim_key]
+
        if isinstance(claim, list):
            values = claim
-        else:
+        elif isinstance(claim, str):
            values = claim.split()
        else:
            continue
        if attribute_key in attributes:
            attributes[attribute_key].extend(values)
--- a/llama_stack/core/server/quota.py
+++ b/llama_stack/core/server/quota.py
@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta
 from starlette.types import ASGIApp, Receive, Scope, Send
 from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
 from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
 logger = get_logger(name=__name__, category="core::server")
@ -33,7 +33,7 @@ class QuotaMiddleware:
    def __init__(
        self,
        app: ASGIApp,
-        kv_config: KVStoreConfig,
+        kv_config: KVStoreReference,
        anonymous_max_requests: int,
        authenticated_max_requests: int,
        window_seconds: int = 86400,
@ -45,15 +45,15 @@ class QuotaMiddleware:
        self.authenticated_max_requests = authenticated_max_requests
        self.window_seconds = window_seconds
        if isinstance(self.kv_config, SqliteKVStoreConfig):
            logger.warning(
                "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
                f"window_seconds={self.window_seconds}"
            )
    async def _get_kv(self) -> KVStore:
        if self.kv is None:
            self.kv = await kvstore_impl(self.kv_config)
            backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
            if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
                logger.warning(
                    "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
                    f"window_seconds={self.window_seconds}"
                )
        return self.kv
    async def __call__(self, scope: Scope, receive: Receive, send: Send):
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -42,6 +42,16 @@ from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceI
 from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
 from llama_stack.core.resolver import ProviderRegistry, resolve_impls
 from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    ServerStoresConfig,
    SqliteKVStoreConfig,
    SqliteSqlStoreConfig,
    SqlStoreReference,
    StorageBackendConfig,
    StorageConfig,
 )
 from llama_stack.core.store.registry import create_dist_registry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
@ -357,6 +367,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
    impls[Api.conversations] = conversations_impl
 def _initialize_storage(run_config: StackRunConfig):
    kv_backends: dict[str, StorageBackendConfig] = {}
    sql_backends: dict[str, StorageBackendConfig] = {}
    for backend_name, backend_config in run_config.storage.backends.items():
        type = backend_config.type.value
        if type.startswith("kv_"):
            kv_backends[backend_name] = backend_config
        elif type.startswith("sql_"):
            sql_backends[backend_name] = backend_config
        else:
            raise ValueError(f"Unknown storage backend type: {type}")
    from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
    from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
    register_kvstore_backends(kv_backends)
    register_sqlstore_backends(sql_backends)
 class Stack:
    def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
        self.run_config = run_config
@ -375,7 +404,11 @@ class Stack:
                TEST_RECORDING_CONTEXT.__enter__()
                logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
-        dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
+        _initialize_storage(self.run_config)
        stores = self.run_config.storage.stores
        if not stores.metadata:
            raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
        dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
        policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
        internal_impls = {}
@ -516,5 +549,16 @@ def run_config_from_adhoc_config_spec(
        image_name="distro-test",
        apis=list(provider_configs_by_api.keys()),
        providers=provider_configs_by_api,
        storage=StorageConfig(
            backends={
                "kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"),
                "sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"),
            },
            stores=ServerStoresConfig(
                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
                conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
            ),
        ),
    )
    return config
--- a/llama_stack/core/storage/init.py
+++ b/llama_stack/core/storage/init.py
@ -0,0 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
--- a/llama_stack/core/storage/datatypes.py
+++ b/llama_stack/core/storage/datatypes.py
@ -0,0 +1,283 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import re
 from abc import abstractmethod
 from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Literal
 from pydantic import BaseModel, Field, field_validator
 class StorageBackendType(StrEnum):
    KV_REDIS = "kv_redis"
    KV_SQLITE = "kv_sqlite"
    KV_POSTGRES = "kv_postgres"
    KV_MONGODB = "kv_mongodb"
    SQL_SQLITE = "sql_sqlite"
    SQL_POSTGRES = "sql_postgres"
 class CommonConfig(BaseModel):
    namespace: str | None = Field(
        default=None,
        description="All keys will be prefixed with this namespace",
    )
 class RedisKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS
    host: str = "localhost"
    port: int = 6379
    @property
    def url(self) -> str:
        return f"redis://{self.host}:{self.port}"
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["redis"]
    @classmethod
    def sample_run_config(cls):
        return {
            "type": StorageBackendType.KV_REDIS.value,
            "host": "${env.REDIS_HOST:=localhost}",
            "port": "${env.REDIS_PORT:=6379}",
        }
 class SqliteKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE
    db_path: str = Field(
        description="File path for the sqlite database",
    )
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["aiosqlite"]
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
        return {
            "type": StorageBackendType.KV_SQLITE.value,
            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        }
 class PostgresKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
    host: str = "localhost"
    port: int | str = 5432
    db: str = "llamastack"
    user: str
    password: str | None = None
    ssl_mode: str | None = None
    ca_cert_path: str | None = None
    table_name: str = "llamastack_kvstore"
    @classmethod
    def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
        return {
            "type": StorageBackendType.KV_POSTGRES.value,
            "host": "${env.POSTGRES_HOST:=localhost}",
            "port": "${env.POSTGRES_PORT:=5432}",
            "db": "${env.POSTGRES_DB:=llamastack}",
            "user": "${env.POSTGRES_USER:=llamastack}",
            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
            "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
        }
    @classmethod
    @field_validator("table_name")
    def validate_table_name(cls, v: str) -> str:
        # PostgreSQL identifiers rules:
        # - Must start with a letter or underscore
        # - Can contain letters, numbers, and underscores
        # - Maximum length is 63 bytes
        pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
        if not re.match(pattern, v):
            raise ValueError(
                "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
            )
        if len(v) > 63:
            raise ValueError("Table name must be less than 63 characters")
        return v
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["psycopg2-binary"]
 class MongoDBKVStoreConfig(CommonConfig):
    type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB
    host: str = "localhost"
    port: int = 27017
    db: str = "llamastack"
    user: str | None = None
    password: str | None = None
    collection_name: str = "llamastack_kvstore"
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["pymongo"]
    @classmethod
    def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
        return {
            "type": StorageBackendType.KV_MONGODB.value,
            "host": "${env.MONGODB_HOST:=localhost}",
            "port": "${env.MONGODB_PORT:=5432}",
            "db": "${env.MONGODB_DB}",
            "user": "${env.MONGODB_USER}",
            "password": "${env.MONGODB_PASSWORD}",
            "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
        }
 class SqlAlchemySqlStoreConfig(BaseModel):
    @property
    @abstractmethod
    def engine_str(self) -> str: ...
    # TODO: move this when we have a better way to specify dependencies with internal APIs
    @classmethod
    def pip_packages(cls) -> list[str]:
        return ["sqlalchemy[asyncio]"]
 class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
    type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
    db_path: str = Field(
        description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
    )
    @property
    def engine_str(self) -> str:
        return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
        return {
            "type": StorageBackendType.SQL_SQLITE.value,
            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        }
    @classmethod
    def pip_packages(cls) -> list[str]:
        return super().pip_packages() + ["aiosqlite"]
 class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
    type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
    host: str = "localhost"
    port: int | str = 5432
    db: str = "llamastack"
    user: str
    password: str | None = None
    @property
    def engine_str(self) -> str:
        return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
    @classmethod
    def pip_packages(cls) -> list[str]:
        return super().pip_packages() + ["asyncpg"]
    @classmethod
    def sample_run_config(cls, **kwargs):
        return {
            "type": StorageBackendType.SQL_POSTGRES.value,
            "host": "${env.POSTGRES_HOST:=localhost}",
            "port": "${env.POSTGRES_PORT:=5432}",
            "db": "${env.POSTGRES_DB:=llamastack}",
            "user": "${env.POSTGRES_USER:=llamastack}",
            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
        }
 # reference = (backend_name, table_name)
 class SqlStoreReference(BaseModel):
    """A reference to a 'SQL-like' persistent store. A table name must be provided."""
    table_name: str = Field(
        description="Name of the table to use for the SqlStore",
    )
    backend: str = Field(
        description="Name of backend from storage.backends",
    )
 # reference = (backend_name, namespace)
 class KVStoreReference(BaseModel):
    """A reference to a 'key-value' persistent store. A namespace must be provided."""
    namespace: str = Field(
        description="Key prefix for KVStore backends",
    )
    backend: str = Field(
        description="Name of backend from storage.backends",
    )
 StorageBackendConfig = Annotated[
    RedisKVStoreConfig
    | SqliteKVStoreConfig
    | PostgresKVStoreConfig
    | MongoDBKVStoreConfig
    | SqliteSqlStoreConfig
    | PostgresSqlStoreConfig,
    Field(discriminator="type"),
 ]
 class InferenceStoreReference(SqlStoreReference):
    """Inference store configuration with queue tuning."""
    max_write_queue_size: int = Field(
        default=10000,
        description="Max queued writes for inference store",
    )
    num_writers: int = Field(
        default=4,
        description="Number of concurrent background writers",
    )
 class ResponsesStoreReference(InferenceStoreReference):
    """Responses store configuration with queue tuning."""
 class ServerStoresConfig(BaseModel):
    metadata: KVStoreReference | None = Field(
        default=None,
        description="Metadata store configuration (uses KV backend)",
    )
    inference: InferenceStoreReference | None = Field(
        default=None,
        description="Inference store configuration (uses SQL backend)",
    )
    conversations: SqlStoreReference | None = Field(
        default=None,
        description="Conversations store configuration (uses SQL backend)",
    )
    responses: ResponsesStoreReference | None = Field(
        default=None,
        description="Responses store configuration (uses SQL backend)",
    )
 class StorageConfig(BaseModel):
    backends: dict[str, StorageBackendConfig] = Field(
        description="Named backend configurations (e.g., 'default', 'cache')",
    )
    stores: ServerStoresConfig = Field(
        default_factory=lambda: ServerStoresConfig(),
        description="Named references to storage backends used by the stack core",
    )
--- a/llama_stack/core/store/registry.py
+++ b/llama_stack/core/store/registry.py
@ -11,10 +11,9 @@ from typing import Protocol
 import pydantic
 from llama_stack.core.datatypes import RoutableObjectWithProvider
-from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 logger = get_logger(__name__, category="core::registry")
@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
 async def create_dist_registry(
-    metadata_store: KVStoreConfig | None,
+    metadata_store: KVStoreReference, image_name: str
    image_name: str,
 ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
    # instantiate kvstore for storing and retrieving distribution metadata
-    if metadata_store:
+    dist_kvstore = await kvstore_impl(metadata_store)
        dist_kvstore = await kvstore_impl(metadata_store)
    else:
        dist_kvstore = await kvstore_impl(
            SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
        )
    dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
    await dist_registry.initialize()
    return dist_registry, dist_kvstore
--- a/llama_stack/core/utils/config_resolution.py
+++ b/llama_stack/core/utils/config_resolution.py
@ -42,25 +42,25 @@ def resolve_config_or_distro(
    # Strategy 1: Try as file path first
    config_path = Path(config_or_distro)
    if config_path.exists() and config_path.is_file():
-        logger.info(f"Using file path: {config_path}")
+        logger.debug(f"Using file path: {config_path}")
        return config_path.resolve()
    # Strategy 2: Try as distribution name (if no .yaml extension)
    if not config_or_distro.endswith(".yaml"):
        distro_config = _get_distro_config_path(config_or_distro, mode)
        if distro_config.exists():
-            logger.info(f"Using distribution: {distro_config}")
+            logger.debug(f"Using distribution: {distro_config}")
            return distro_config
    # Strategy 3: Try as built distribution name
    distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
    if distrib_config.exists():
-        logger.info(f"Using built distribution: {distrib_config}")
+        logger.debug(f"Using built distribution: {distrib_config}")
        return distrib_config
    distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
    if distrib_config.exists():
-        logger.info(f"Using built distribution: {distrib_config}")
+        logger.debug(f"Using built distribution: {distrib_config}")
        return distrib_config
    # Strategy 4: Failed - provide helpful error
--- a/Show more
+++ b/Show more