Merge branch 'main' into milvus-files-api

2025-07-29 15:23:51 +00:00 · 2025-07-03 14:03:51 -04:00 · 2025-07-03 14:03:51 -04:00 · c4fa7ab978
commit c4fa7ab978
parent c5f4fe3b9f dae1fcd3c2
74 changed files with 1001 additions and 348 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -25,7 +25,7 @@ jobs:
        # Listing tests manually since some of them currently fail
        # TODO: generate matrix list from tests/integration when fixed
        test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io]
-        client-type: [library, http]
+        client-type: [library, server]
        python-version: ["3.12", "3.13"]
      fail-fast: false # we want to run all tests regardless of failure

@ -45,39 +45,6 @@ jobs:
        run: |
          uv run llama stack build --template ollama --image-type venv

-      - name: Start Llama Stack server in background
-        if: matrix.client-type == 'http'
-        env:
-          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
-        run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
-
-      - name: Wait for Llama Stack server to be ready
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Waiting for Llama Stack server..."
-          for i in {1..30}; do
-            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
-              echo "Llama Stack server is up!"
-              exit 0
-            fi
-            sleep 1
-          done
-          echo "Llama Stack server failed to start"
-          cat server.log
-          exit 1
-
-      - name: Verify Ollama status is OK
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Verifying Ollama status..."
-          ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
-          echo "Ollama status: $ollama_status"
-          if [ "$ollama_status" != "OK" ]; then
-            echo "Ollama health check failed"
-            exit 1
-          fi
-
      - name: Check Storage and Memory Available Before Tests
        if: ${{ always() }}
        run: |
@ -92,12 +59,14 @@ jobs:
          if [ "${{ matrix.client-type }}" == "library" ]; then
            stack_config="ollama"
          else
-            stack_config="http://localhost:8321"
+            stack_config="server:ollama"
          fi
          uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
            -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
            --text-model="meta-llama/Llama-3.2-3B-Instruct" \
-            --embedding-model=all-MiniLM-L6-v2
+            --embedding-model=all-MiniLM-L6-v2 \
+            --color=yes \
+            --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log

      - name: Check Storage and Memory Available After Tests
        if: ${{ always() }}
--- a/README.md
+++ b/README.md
@ -35,6 +35,8 @@ pip install llama-stack-client
 ### CLI
 ```bash
 # Run a chat completion
+MODEL="Llama-4-Scout-17B-16E-Instruct"
+
 llama-stack-client --endpoint http://localhost:8321 \
 inference chat-completion \
 --model-id meta-llama/$MODEL \
@ -106,46 +108,59 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on

 ### API Providers
 Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
+Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)

-| **API Provider Builder** |    **Environments**    | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** |
-|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:|
-|      Meta Reference      |      Single Node       |     ✅      |       ✅       |     ✅      |     ✅      |       ✅       |               |
-|        SambaNova         |         Hosted         |            |       ✅       |            |     ✅      |               |                  |
-|         Cerebras         |         Hosted         |            |       ✅       |            |            |               |                  |
-|        Fireworks         |         Hosted         |     ✅      |       ✅       |     ✅      |            |               |                |
-|       AWS Bedrock        |         Hosted         |            |       ✅       |            |     ✅      |               |                |
-|         Together         |         Hosted         |     ✅      |       ✅       |            |     ✅      |               |                |
-|           Groq           |         Hosted         |            |       ✅       |            |            |               |                 |
-|          Ollama          |      Single Node       |            |       ✅       |            |            |               |                 |
-|           TGI            | Hosted and Single Node |            |       ✅       |            |            |               |                 |
-|        NVIDIA NIM        | Hosted and Single Node |            |       ✅       |            |            |               |                 |
-|          Chroma          |      Single Node       |            |               |     ✅      |            |               |                 |
-|        PG Vector         |      Single Node       |            |               |     ✅      |            |               |                 |
-|    PyTorch ExecuTorch    |     On-device iOS      |     ✅      |       ✅       |            |            |               |                |
-|           vLLM           | Hosted and Single Node |            |       ✅       |            |            |               |                 |
-|          OpenAI          |         Hosted         |            |       ✅       |            |            |               |                 |
-|        Anthropic         |         Hosted         |            |       ✅       |            |            |               |                 |
-|          Gemini          |         Hosted         |            |       ✅       |            |            |               |                 |
-|          watsonx         |         Hosted         |            |       ✅       |            |            |               |                 |
-|        HuggingFace       |       Single Node      |            |                |            |            |               |       ✅        |
-|         TorchTune        |       Single Node      |            |                |            |            |               |       ✅        |
-|       NVIDIA NEMO        |         Hosted         |            |                |            |            |               |       ✅        |
+| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
+|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
+| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| SambaNova | Hosted | | ✅ | | ✅ | | | | |
+| Cerebras | Hosted | | ✅ | | | | | | |
+| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
+| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
+| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
+| Groq | Hosted | | ✅ | | | | | | |
+| Ollama | Single Node | | ✅ | | | | | | |
+| TGI | Hosted/Single Node | | ✅ | | | | | | |
+| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
+| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
+| PG Vector | Single Node | | | ✅ | | | | | |
+| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
+| vLLM | Single Node | | ✅ | | | | | | |
+| OpenAI | Hosted | | ✅ | | | | | | |
+| Anthropic | Hosted | | ✅ | | | | | | |
+| Gemini | Hosted | | ✅ | | | | | | |
+| WatsonX | Hosted | | ✅ | | | | | | |
+| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
+| TorchTune | Single Node | | | | | | ✅ | | |
+| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
+| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |

+> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation.

 ### Distributions

-A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support:
+A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
+Here are some of the distributions we support:

 |               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
 |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html)      |
-|                   SambaNova                   |                     [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html)   |
-|                   Cerebras                    |                     [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html)   |
+|                      TGI                      |                          [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general)                          |             [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)
+|                       vLLM                    |                  [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general)                  |         [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)
+|                   Starter                     |                    [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general)                    |                   |
+|                   PostgreSQL                  |                [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general)                |                  |
+
+
+Here are the ones out of support scope but still avaiable from Dockerhub:
+
+|               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
+|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                    Ollama                     |                       [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general)                       |            [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html)            |
-|                      TGI                      |                          [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general)                          |             [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)              |
 |                   Together                    |                     [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general)                     |           [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html)           |
 |                   Fireworks                   |                    [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general)                    |          [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html)           |
-| vLLM |                  [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general)                  |         [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)          |
+|                   AWS Bedrock                 |                    [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general)                    |          [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/bedrock.html)           |
+|                   SambaNova                   |                     [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html)   |
+|                   Cerebras                    |                     [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html)   |          |             |         |


 ### Documentation
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -17,7 +17,9 @@
        "\n",
        "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
        "\n",
-        "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n"
+        "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n",
+        "\n",
+        "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
      ]
    },
    {
--- a/docs/getting_started_llama4.ipynb
+++ b/docs/getting_started_llama4.ipynb
@ -17,7 +17,9 @@
        "\n",
        "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
        "\n",
-        "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
+        "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
+        "\n",
+        "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
      ]
    },
    {
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
@ -17,7 +17,9 @@
          "\n",
          "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
          "\n",
-          "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
+          "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
+          "\n",
+          "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
        ]
      },
      {
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@ -0,0 +1,367 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "c1e7571c",
+      "metadata": {
+        "id": "c1e7571c"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
+        "\n",
+        "# Llama Stack - Building AI Applications\n",
+        "\n",
+        "<img src=\"https://llama-stack.readthedocs.io/en/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
+        "\n",
+        "Get started with Llama Stack in minutes!\n",
+        "\n",
+        "[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
+        "\n",
+        "In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
+        "as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "4CV1Q19BDMVw",
+      "metadata": {
+        "id": "4CV1Q19BDMVw"
+      },
+      "source": [
+        "## Step 1: Install and setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "K4AvfUAJZOeS",
+      "metadata": {
+        "id": "K4AvfUAJZOeS"
+      },
+      "source": [
+        "### 1.1. Install uv and test inference with Ollama\n",
+        "\n",
+        "We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "7a2d7b85",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%pip install uv llama_stack llama-stack-client\n",
+        "\n",
+        "## If running on Collab:\n",
+        "# !pip install colab-xterm\n",
+        "# %load_ext colabxterm\n",
+        "\n",
+        "!curl https://ollama.ai/install.sh | sh"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "39fa584b",
+      "metadata": {},
+      "source": [
+        "### 1.2. Test inference with Ollama"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "3bf81522",
+      "metadata": {},
+      "source": [
+        "We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a7e8e0f1",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "## If running on Colab:\n",
+        "# %xterm\n",
+        "\n",
+        "## To be ran in the terminal:\n",
+        "# ollama serve &\n",
+        "# ollama run llama3.2:3b --keepalive 60m"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "f3c5f243",
+      "metadata": {},
+      "source": [
+        "If successful, you should see the model respond to a prompt.\n",
+        "\n",
+        "...\n",
+        "```\n",
+        ">>> hi\n",
+        "Hello! How can I assist you today?\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "oDUB7M_qe-Gs",
+      "metadata": {
+        "id": "oDUB7M_qe-Gs"
+      },
+      "source": [
+        "## Step 2: Run the Llama Stack server\n",
+        "\n",
+        "In this showcase, we will start a Llama Stack server that is running locally."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "732eadc6",
+      "metadata": {},
+      "source": [
+        "### 2.1. Setup the Llama Stack Server"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "J2kGed0R5PSf",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "J2kGed0R5PSf",
+        "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
+      },
+      "outputs": [],
+      "source": [
+        "import os \n",
+        "import subprocess\n",
+        "\n",
+        "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
+        "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
+        "\n",
+        "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
+        "!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n",
+        "\n",
+        "def run_llama_stack_server_background():\n",
+        "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
+        "    process = subprocess.Popen(\n",
+        "        f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n",
+        "        shell=True,\n",
+        "        stdout=log_file,\n",
+        "        stderr=log_file,\n",
+        "        text=True\n",
+        "    )\n",
+        "    \n",
+        "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
+        "    return process\n",
+        "\n",
+        "def wait_for_server_to_start():\n",
+        "    import requests\n",
+        "    from requests.exceptions import ConnectionError\n",
+        "    import time\n",
+        "    \n",
+        "    url = \"http://0.0.0.0:8321/v1/health\"\n",
+        "    max_retries = 30\n",
+        "    retry_interval = 1\n",
+        "    \n",
+        "    print(\"Waiting for server to start\", end=\"\")\n",
+        "    for _ in range(max_retries):\n",
+        "        try:\n",
+        "            response = requests.get(url)\n",
+        "            if response.status_code == 200:\n",
+        "                print(\"\\nServer is ready!\")\n",
+        "                return True\n",
+        "        except ConnectionError:\n",
+        "            print(\".\", end=\"\", flush=True)\n",
+        "            time.sleep(retry_interval)\n",
+        "            \n",
+        "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
+        "    return False\n",
+        "\n",
+        "\n",
+        "# use this helper if needed to kill the server \n",
+        "def kill_llama_stack_server():\n",
+        "    # Kill any existing llama stack server processes\n",
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "c40e9efd",
+      "metadata": {},
+      "source": [
+        "### 2.2. Start the Llama Stack Server"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "id": "f779283d",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Starting Llama Stack server with PID: 787100\n",
+            "Waiting for server to start\n",
+            "Server is ready!\n"
+          ]
+        }
+      ],
+      "source": [
+        "server_process = run_llama_stack_server_background()\n",
+        "assert wait_for_server_to_start()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "28477c03",
+      "metadata": {},
+      "source": [
+        "## Step 3: Run the demo"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "id": "7da71011",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
+            "prompt> How do you do great work?\n",
+            "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
+            "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
+            "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
+            "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
+            "\u001b[30m\u001b[0m"
+          ]
+        }
+      ],
+      "source": [
+        "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
+        "\n",
+        "vector_db_id = \"my_demo_vector_db\"\n",
+        "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
+        "\n",
+        "models = client.models.list()\n",
+        "\n",
+        "# Select the first LLM and first embedding models\n",
+        "model_id = next(m for m in models if m.model_type == \"llm\").identifier\n",
+        "embedding_model_id = (\n",
+        "    em := next(m for m in models if m.model_type == \"embedding\")\n",
+        ").identifier\n",
+        "embedding_dimension = em.metadata[\"embedding_dimension\"]\n",
+        "\n",
+        "_ = client.vector_dbs.register(\n",
+        "    vector_db_id=vector_db_id,\n",
+        "    embedding_model=embedding_model_id,\n",
+        "    embedding_dimension=embedding_dimension,\n",
+        "    provider_id=\"faiss\",\n",
+        ")\n",
+        "source = \"https://www.paulgraham.com/greatwork.html\"\n",
+        "print(\"rag_tool> Ingesting document:\", source)\n",
+        "document = RAGDocument(\n",
+        "    document_id=\"document_1\",\n",
+        "    content=source,\n",
+        "    mime_type=\"text/html\",\n",
+        "    metadata={},\n",
+        ")\n",
+        "client.tool_runtime.rag_tool.insert(\n",
+        "    documents=[document],\n",
+        "    vector_db_id=vector_db_id,\n",
+        "    chunk_size_in_tokens=50,\n",
+        ")\n",
+        "agent = Agent(\n",
+        "    client,\n",
+        "    model=model_id,\n",
+        "    instructions=\"You are a helpful assistant\",\n",
+        "    tools=[\n",
+        "        {\n",
+        "            \"name\": \"builtin::rag/knowledge_search\",\n",
+        "            \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
+        "        }\n",
+        "    ],\n",
+        ")\n",
+        "\n",
+        "prompt = \"How do you do great work?\"\n",
+        "print(\"prompt>\", prompt)\n",
+        "\n",
+        "response = agent.create_turn(\n",
+        "    messages=[{\"role\": \"user\", \"content\": prompt}],\n",
+        "    session_id=agent.create_session(\"rag_session\"),\n",
+        "    stream=True,\n",
+        ")\n",
+        "\n",
+        "for log in AgentEventLogger().log(response):\n",
+        "    log.print()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "341aaadf",
+      "metadata": {},
+      "source": [
+        "Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e88e1185",
+      "metadata": {},
+      "source": [
+        "## Next Steps"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "bcb73600",
+      "metadata": {},
+      "source": [
+        "Now you're ready to dive deeper into Llama Stack!\n",
+        "- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
+        "- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
+        "- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
+        "- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
+        "- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
+        "- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
+        "- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.6"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@ -80,7 +80,7 @@ providers:
    # provider_id is a string you can choose freely
  - provider_id: ollama
    # provider_type is a string that specifies the type of provider.
-    # in this case, the provider for inference is ollama and it is run remotely (outside of the distribution)
+    # in this case, the provider for inference is ollama and it runs remotely (outside of the distribution)
    provider_type: remote::ollama
    # config is a dictionary that contains the configuration for the provider.
    # in this case, the configuration is the url of the ollama server
@ -88,7 +88,7 @@ providers:
      url: ${env.OLLAMA_URL:=http://localhost:11434}
 ```
 A few things to note:
- A _provider instance_ is identified with an (id, type, configuration) triplet.
+- A _provider instance_ is identified with an (id, type, config) triplet.
 - The id is a string you can choose freely.
 - You can instantiate any number of provider instances of the same type.
 - The configuration dictionary is provider-specific.
@ -187,7 +187,7 @@ The environment variable substitution system is type-safe:

 ## Resources

-Finally, let's look at the `models` section:
+Let's look at the `models` section:

 ```yaml
 models:
@ -195,8 +195,9 @@ models:
  model_id: ${env.INFERENCE_MODEL}
  provider_id: ollama
  provider_model_id: null
+  model_type: llm
 ```
-A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models.
+A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.

 What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.

--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@ -8,6 +8,8 @@ environments. You can build and test using a local server first and deploy to a
 In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
 as the inference [provider](../providers/inference/index) for a Llama Model.

+**💡 Notebook Version:** You can also follow this quickstart guide in a Jupyter notebook format: [quick_start.ipynb](https://github.com/meta-llama/llama-stack/blob/main/docs/quick_start.ipynb)
+
 #### Step 1: Install and setup
 1. Install [uv](https://docs.astral.sh/uv/)
 2. Run inference on a Llama model with [Ollama](https://ollama.com/download)
--- a/docs/source/index.md
+++ b/docs/source/index.md
@ -73,17 +73,26 @@ A number of "adapters" are available for some popular Inference and Vector Store
 |  OpenAI  |  Hosted  |
 |  Anthropic  |  Hosted  |
 |  Gemini  |  Hosted  |
+|  WatsonX  |  Hosted  |

+**Agents API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+|  Fireworks  |  Hosted  |
+|  Together  |  Hosted  |
+|  PyTorch ExecuTorch | On-device iOS |

 **Vector IO API**
 |  **Provider** |  **Environments** |
 | :----: | :----: |
 |  FAISS | Single Node |
-|  SQLite-Vec| Single Node |
+|  SQLite-Vec | Single Node |
 |  Chroma | Hosted and Single Node |
 |  Milvus | Hosted and Single Node |
 |  Postgres (PGVector) | Hosted and Single Node |
 |  Weaviate | Hosted |
+|  Qdrant  | Hosted and Single Node |

 **Safety API**
 |  **Provider** |  **Environments** |
@ -93,6 +102,30 @@ A number of "adapters" are available for some popular Inference and Vector Store
 |  Code Scanner | Single Node |
 |  AWS Bedrock | Hosted |

+**Post Training API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+|  HuggingFace  |  Single Node |
+|  TorchTune  |  Single Node |
+|  NVIDIA NEMO  |  Hosted |
+
+**Eval API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+|  NVIDIA NEMO  |  Hosted |
+
+**Telemetry API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+
+**Tool Runtime API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Brave Search | Hosted |
+|  RAG Runtime | Single Node |

 ```{toctree}
 :hidden:
--- a/docs/source/providers/agents/inline_meta-reference.md
+++ b/docs/source/providers/agents/inline_meta-reference.md
@ -16,7 +16,6 @@ Meta's reference implementation of an agent system that can use tools, access ve
 ```yaml
 persistence_store:
  type: sqlite
-  namespace: null
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
 responses_store:
  type: sqlite
--- a/docs/source/providers/datasetio/inline_localfs.md
+++ b/docs/source/providers/datasetio/inline_localfs.md
@ -15,7 +15,6 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
 ```yaml
 kvstore:
  type: sqlite
-  namespace: null
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db

 ```
--- a/docs/source/providers/datasetio/remote_huggingface.md
+++ b/docs/source/providers/datasetio/remote_huggingface.md
@ -15,7 +15,6 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
 ```yaml
 kvstore:
  type: sqlite
-  namespace: null
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db

 ```
--- a/docs/source/providers/eval/inline_meta-reference.md
+++ b/docs/source/providers/eval/inline_meta-reference.md
@ -15,7 +15,6 @@ Meta's reference implementation of evaluation tasks with support for multiple la
 ```yaml
 kvstore:
  type: sqlite
-  namespace: null
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db

 ```
--- a/docs/source/providers/index.md
+++ b/docs/source/providers/index.md
@ -1,9 +1,10 @@
 # Providers Overview

 The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.),
- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.),
- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.)
+- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
+- Vector databases (e.g., FAISS, SQLite-Vec, ChromaDB, Weaviate, Qdrant, Milvus, PGVector, etc.),
+- Safety providers (e.g., Meta's Llama Guard, Prompt Guard, Code Scanner, AWS Bedrock Guardrails, etc.),
+- Tool Runtime providers (e.g., RAG Runtime, Brave Search, etc.)

 Providers come in two flavors:
 - **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code.
--- a/docs/source/providers/vector_io/inline_faiss.md
+++ b/docs/source/providers/vector_io/inline_faiss.md
@ -44,7 +44,6 @@ more details about Faiss in general.
 ```yaml
 kvstore:
  type: sqlite
-  namespace: null
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db

 ```
--- a/docs/source/providers/vector_io/inline_meta-reference.md
+++ b/docs/source/providers/vector_io/inline_meta-reference.md
@ -15,7 +15,6 @@ Meta's reference implementation of a vector database.
 ```yaml
 kvstore:
  type: sqlite
-  namespace: null
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db

 ```
--- a/docs/source/providers/vector_io/inline_milvus.md
+++ b/docs/source/providers/vector_io/inline_milvus.md
@ -19,7 +19,6 @@ Please refer to the remote provider documentation.
 db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
 kvstore:
  type: sqlite
-  namespace: null
  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db

 ```
--- a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
+++ b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb
@ -36,7 +36,7 @@
        "from dotenv import load_dotenv\n",
        "from llama_stack_client import LlamaStackClient\n",
        "from llama_stack_client.lib.agents.agent import Agent\n",
-        "from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
+        "from llama_stack_client.lib.agents.client_tool import ClientTool\n",
        "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
        "from llama_stack_client.types import CompletionMessage\n",
        "from llama_stack_client.types.agent_create_params import AgentConfig\n",
@ -129,7 +129,7 @@
      "source": [
        "## Step 3: Create a Custom Tool Class\n",
        "\n",
-        "Here, we defines the `WebSearchTool` class, which extends `CustomTool` to integrate the Brave Search API with Llama Stack, enabling web search capabilities within AI workflows. The class handles incoming user queries, interacts with the `BraveSearch` class for data retrieval, and formats results for effective response generation."
+        "Here, we defines the `WebSearchTool` class, which extends `ClientTool` to integrate the Brave Search API with Llama Stack, enabling web search capabilities within AI workflows. The class handles incoming user queries, interacts with the `BraveSearch` class for data retrieval, and formats results for effective response generation."
      ]
    },
    {
@ -139,7 +139,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "class WebSearchTool(CustomTool):\n",
+        "class WebSearchTool(ClientTool):\n",
        "    def __init__(self, api_key: str):\n",
        "        self.api_key = api_key\n",
        "        self.engine = BraveSearch(api_key)\n",
--- a/docs/zero_to_hero_guide/07_Agents101.ipynb
+++ b/docs/zero_to_hero_guide/07_Agents101.ipynb
@ -45,7 +45,7 @@
    },
    {
      "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": 14,
      "metadata": {},
      "outputs": [],
      "source": [
@ -65,7 +65,7 @@
        "from dotenv import load_dotenv\n",
        "\n",
        "load_dotenv()\n",
-        "BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
+        "TAVILY_SEARCH_API_KEY = os.environ[\"TAVILY_SEARCH_API_KEY\"]\n"
      ]
    },
    {
@ -110,10 +110,17 @@
        "from llama_stack_client import LlamaStackClient\n",
        "from llama_stack_client.lib.agents.agent import Agent\n",
        "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+        "from llama_stack_client.types import UserMessage\n",
+        "from typing import cast, Iterator\n",
        "\n",
        "\n",
        "async def agent_example():\n",
-        "    client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n",
+        "    client = LlamaStackClient(\n",
+        "        base_url=f\"http://{HOST}:{PORT}\",\n",
+        "        provider_data={\n",
+        "            \"tavily_search_api_key\": TAVILY_SEARCH_API_KEY,\n",
+        "        }\n",
+        "    )\n",
        "    agent = Agent(\n",
        "        client,\n",
        "        model=MODEL_NAME,\n",
@ -123,13 +130,7 @@
        "                \"type\": \"greedy\",\n",
        "            },\n",
        "        },\n",
-        "        tools=[\n",
-        "            {\n",
-        "                \"type\": \"brave_search\",\n",
-        "                \"engine\": \"brave\",\n",
-        "                \"api_key\": BRAVE_SEARCH_API_KEY,\n",
-        "            }\n",
-        "        ],\n",
+        "        tools=[\"builtin::websearch\"],\n",
        "    )\n",
        "    session_id = agent.create_session(\"test-session\")\n",
        "    print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n",
@ -142,15 +143,13 @@
        "    for prompt in user_prompts:\n",
        "        response = agent.create_turn(\n",
        "            messages=[\n",
-        "                {\n",
-        "                    \"role\": \"user\",\n",
-        "                    \"content\": prompt,\n",
-        "                }\n",
+        "                UserMessage(role=\"user\", content=prompt)\n",
        "            ],\n",
        "            session_id=session_id,\n",
+        "            stream=True,\n",
        "        )\n",
        "\n",
-        "        async for log in EventLogger().log(response):\n",
+        "        for log in EventLogger().log(cast(Iterator, response)):\n",
        "            log.print()\n",
        "\n",
        "\n",
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@ -2,9 +2,9 @@

 Llama Stack defines and standardizes the set of core building blocks needed to bring generative AI applications to market. These building blocks are presented in the form of interoperable APIs with a broad set of Providers providing their implementations. These building blocks are assembled into Distributions which are easy for developers to get from zero to production.

-This guide will walk you through an end-to-end workflow with Llama Stack with Ollama as the inference provider and ChromaDB as the memory provider. Please note the steps for configuring your provider and distribution will vary a little depending on the services you use. However, the user experience will remain universal - this is the power of Llama-Stack.
+This guide will walk you through an end-to-end workflow with Llama Stack with Ollama as the inference provider and ChromaDB as the VectorIO provider. Please note the steps for configuring your provider and distribution will vary depending on the services you use. However, the user experience will remain universal - this is the power of Llama-Stack.

-If you're looking for more specific topics, we have a [Zero to Hero Guide](#next-steps) that covers everything from Tool Calling to Agents in detail. Feel free to skip to the end to explore the advanced topics you're interested in.
+If you're looking for more specific topics, we have a [Zero to Hero Guide](#next-steps) that covers everything from 'Tool Calling' to 'Agents' in detail. Feel free to skip to the end to explore the advanced topics you're interested in.

 > If you'd prefer not to set up a local server, explore our notebook on [tool calling with the Together API](Tool_Calling101_Using_Together_Llama_Stack_Server.ipynb). This notebook will show you how to leverage together.ai's Llama Stack Server API, allowing you to get started with Llama Stack without the need for a locally built and running server.

@ -26,15 +26,15 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
   - Follow instructions based on the OS you are on. For example, if you are on a Mac, download and unzip `Ollama-darwin.zip`.
   - Run the `Ollama` application.

-1. **Download the Ollama CLI**:
+2. **Download the Ollama CLI**:
   Ensure you have the `ollama` command line tool by downloading and installing it from the same website.

-1. **Start ollama server**:
+3. **Start ollama server**:
   Open the terminal and run:
-   ```
+   ```bash
   ollama serve
   ```
-1. **Run the model**:
+4. **Run the model**:
   Open the terminal and run:
   ```bash
   ollama run llama3.2:3b-instruct-fp16 --keepalive -1m
@ -48,9 +48,9 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
 ## Install Dependencies and Set Up Environment

 1. **Create a Conda Environment**:
-   Create a new Conda environment with Python 3.10:
+   Create a new Conda environment with Python 3.12:
   ```bash
-   conda create -n ollama python=3.10
+   conda create -n ollama python=3.12
   ```
   Activate the environment:
   ```bash
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@ -101,7 +101,7 @@ class MetricInResponse(BaseModel):

 # This is a short term solution to allow inference API to return metrics
 # The ideal way to do this is to have a way for all response types to include metrics
-# and all metric events logged to the telemetry API to be inlcuded with the response
+# and all metric events logged to the telemetry API to be included with the response
 # To do this, we will need to augment all response types with a metrics field.
 # We have hit a blocker from stainless SDK that prevents us from doing this.
 # The blocker is that if we were to augment the response types that have a data field
--- a/llama_stack/distribution/access_control/access_control.py
+++ b/llama_stack/distribution/access_control/access_control.py
@ -106,4 +106,26 @@ def is_action_allowed(


 class AccessDeniedError(RuntimeError):
-    pass
+    def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None):
+        self.action = action
+        self.resource = resource
+        self.user = user
+
+        message = _build_access_denied_message(action, resource, user)
+        super().__init__(message)
+
+
+def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str:
+    """Build detailed error message for access denied scenarios."""
+    if action and resource and user:
+        resource_info = f"{resource.type}::{resource.identifier}"
+        user_info = f"'{user.principal}'"
+        if user.attributes:
+            attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()])
+            user_info += f" (attributes: {attrs})"
+
+        message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'"
+    else:
+        message = "Insufficient permissions"
+
+    return message
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
    builtin_automatically_routed_apis,
    get_provider_registry,
 )
+from llama_stack.distribution.stack import replace_env_vars
 from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
@ -163,7 +164,7 @@ def upgrade_from_routing_table(
 def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
    version = config_dict.get("version", None)
    if version == LLAMA_STACK_RUN_CONFIG_VERSION:
-        return StackRunConfig(**config_dict)
+        return StackRunConfig(**replace_env_vars(config_dict))

    if "routing_table" in config_dict:
        logger.info("Upgrading config...")
@ -174,4 +175,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
    if not config_dict.get("external_providers_dir", None):
        config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR

-    return StackRunConfig(**config_dict)
+    return StackRunConfig(**replace_env_vars(config_dict))
--- a/llama_stack/distribution/routing_tables/common.py
+++ b/llama_stack/distribution/routing_tables/common.py
@ -175,8 +175,9 @@ class CommonRoutingTableImpl(RoutingTable):
        return obj

    async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
-        if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()):
-            raise AccessDeniedError()
+        user = get_authenticated_user()
+        if not is_action_allowed(self.policy, "delete", obj, user):
+            raise AccessDeniedError("delete", obj, user)
        await self.dist_registry.delete(obj.type, obj.identifier)
        await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])

@ -193,7 +194,7 @@ class CommonRoutingTableImpl(RoutingTable):
        # If object supports access control but no attributes set, use creator's attributes
        creator = get_authenticated_user()
        if not is_action_allowed(self.policy, "create", obj, creator):
-            raise AccessDeniedError()
+            raise AccessDeniedError("create", obj, creator)
        if creator:
            obj.owner = creator
            logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -9,6 +9,7 @@ import asyncio
 import functools
 import inspect
 import json
+import logging
 import os
 import ssl
 import sys
@ -31,6 +32,7 @@ from openai import BadRequestError
 from pydantic import BaseModel, ValidationError

 from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.distribution.access_control.access_control import AccessDeniedError
 from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
 from llama_stack.distribution.distribution import builtin_automatically_routed_apis
 from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
@ -116,7 +118,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
        return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
    elif isinstance(exc, BadRequestError):
        return HTTPException(status_code=400, detail=str(exc))
-    elif isinstance(exc, PermissionError):
+    elif isinstance(exc, PermissionError | AccessDeniedError):
        return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
    elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
        return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
@ -236,7 +238,10 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
                        result.url = route
                    return result
            except Exception as e:
+                if logger.isEnabledFor(logging.DEBUG):
                    logger.exception(f"Error executing endpoint {route=} {method=}")
+                else:
+                    logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
                raise translate_exception(e) from e

    sig = inspect.signature(func)
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@ -10,11 +10,11 @@ from typing import Protocol

 import pydantic

-from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider
+from llama_stack.distribution.datatypes import RoutableObjectWithProvider
 from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig

 logger = get_logger(__name__, category="core")

--- a/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py
@ -53,7 +53,7 @@ class AgentPersistence:
            identifier=name,  # should this be qualified in any way?
        )
        if not is_action_allowed(self.policy, "create", session_info, user):
-            raise AccessDeniedError()
+            raise AccessDeniedError("create", session_info, user)

        await self.kvstore.set(
            key=f"session:{self.agent_id}:{session_id}",
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
                "pillow",
                "pandas",
                "scikit-learn",
+                "mcp",
            ]
            + kvstore_dependencies(),  # TODO make this dynamic based on the kvstore config
            module="llama_stack.providers.inline.agents.meta_reference",
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@ -36,15 +36,14 @@ class RedisKVStoreConfig(CommonConfig):
    def url(self) -> str:
        return f"redis://{self.host}:{self.port}"

-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
        return ["redis"]

    @classmethod
    def sample_run_config(cls):
        return {
            "type": "redis",
-            "namespace": None,
            "host": "${env.REDIS_HOST:=localhost}",
            "port": "${env.REDIS_PORT:=6379}",
        }
@ -57,15 +56,14 @@ class SqliteKVStoreConfig(CommonConfig):
        description="File path for the sqlite database",
    )

-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
        return ["aiosqlite"]

    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
        return {
            "type": "sqlite",
-            "namespace": None,
            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        }

@ -73,7 +71,7 @@ class SqliteKVStoreConfig(CommonConfig):
 class PostgresKVStoreConfig(CommonConfig):
    type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
    host: str = "localhost"
-    port: str = "5432"
+    port: int = 5432
    db: str = "llamastack"
    user: str
    password: str | None = None
@ -83,7 +81,6 @@ class PostgresKVStoreConfig(CommonConfig):
    def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
        return {
            "type": "postgres",
-            "namespace": None,
            "host": "${env.POSTGRES_HOST:=localhost}",
            "port": "${env.POSTGRES_PORT:=5432}",
            "db": "${env.POSTGRES_DB:=llamastack}",
@ -108,8 +105,8 @@ class PostgresKVStoreConfig(CommonConfig):
            raise ValueError("Table name must be less than 63 characters")
        return v

-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
        return ["psycopg2-binary"]


@ -122,15 +119,14 @@ class MongoDBKVStoreConfig(CommonConfig):
    password: str | None = None
    collection_name: str = "llamastack_kvstore"

-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
        return ["pymongo"]

    @classmethod
    def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
        return {
            "type": "mongodb",
-            "namespace": None,
            "host": "${env.MONGODB_HOST:=localhost}",
            "port": "${env.MONGODB_PORT:=5432}",
            "db": "${env.MONGODB_DB}",
@ -144,3 +140,21 @@ KVStoreConfig = Annotated[
    RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
    Field(discriminator="type", default=KVStoreType.sqlite.value),
 ]
+
+
+def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]:
+    """Get pip packages for KV store config, handling both dict and object cases."""
+    if isinstance(store_config, dict):
+        store_type = store_config.get("type")
+        if store_type == "sqlite":
+            return SqliteKVStoreConfig.pip_packages()
+        elif store_type == "postgres":
+            return PostgresKVStoreConfig.pip_packages()
+        elif store_type == "redis":
+            return RedisKVStoreConfig.pip_packages()
+        elif store_type == "mongodb":
+            return MongoDBKVStoreConfig.pip_packages()
+        else:
+            raise ValueError(f"Unknown KV store type: {store_type}")
+    else:
+        return store_config.pip_packages()
--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@ -30,8 +30,8 @@ class SqlAlchemySqlStoreConfig(BaseModel):
    def engine_str(self) -> str: ...

    # TODO: move this when we have a better way to specify dependencies with internal APIs
-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
        return ["sqlalchemy[asyncio]"]


@ -48,20 +48,20 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):

    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
-        return cls(
-            type="sqlite",
-            db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
-        )
+        return {
+            "type": "sqlite",
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
+        }

-    @property
-    def pip_packages(self) -> list[str]:
-        return super().pip_packages + ["aiosqlite"]
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return super().pip_packages() + ["aiosqlite"]


 class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
    type: Literal["postgres"] = SqlStoreType.postgres.value
    host: str = "localhost"
-    port: str = "5432"
+    port: int = 5432
    db: str = "llamastack"
    user: str
    password: str | None = None
@ -70,20 +70,20 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
    def engine_str(self) -> str:
        return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"

-    @property
-    def pip_packages(self) -> list[str]:
-        return super().pip_packages + ["asyncpg"]
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return super().pip_packages() + ["asyncpg"]

    @classmethod
    def sample_run_config(cls, **kwargs):
-        return cls(
-            type="postgres",
-            host="${env.POSTGRES_HOST:=localhost}",
-            port="${env.POSTGRES_PORT:=5432}",
-            db="${env.POSTGRES_DB:=llamastack}",
-            user="${env.POSTGRES_USER:=llamastack}",
-            password="${env.POSTGRES_PASSWORD:=llamastack}",
-        )
+        return {
+            "type": "postgres",
+            "host": "${env.POSTGRES_HOST:=localhost}",
+            "port": "${env.POSTGRES_PORT:=5432}",
+            "db": "${env.POSTGRES_DB:=llamastack}",
+            "user": "${env.POSTGRES_USER:=llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
+        }


 SqlStoreConfig = Annotated[
@ -92,6 +92,20 @@ SqlStoreConfig = Annotated[
 ]


+def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
+    """Get pip packages for SQL store config, handling both dict and object cases."""
+    if isinstance(store_config, dict):
+        store_type = store_config.get("type")
+        if store_type == "sqlite":
+            return SqliteSqlStoreConfig.pip_packages()
+        elif store_type == "postgres":
+            return PostgresSqlStoreConfig.pip_packages()
+        else:
+            raise ValueError(f"Unknown SQL store type: {store_type}")
+    else:
+        return store_config.pip_packages()
+
+
 def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
    if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
        from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
--- a/llama_stack/providers/utils/vector_io/chunk_utils.py
+++ b/llama_stack/providers/utils/vector_io/chunk_utils.py
@ -9,6 +9,11 @@ import uuid


 def generate_chunk_id(document_id: str, chunk_text: str) -> str:
-    """Generate a unique chunk ID using a hash of document ID and chunk text."""
+    """
+    Generate a unique chunk ID using a hash of the document ID and chunk text.
+
+    Note: MD5 is used only to calculate an identifier, not for security purposes.
+    Adding usedforsecurity=False for compatibility with FIPS environments.
+    """
    hash_input = f"{document_id}:{chunk_text}".encode()
-    return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
+    return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -21,7 +21,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
  safety:
  - provider_id: bedrock
@ -33,7 +32,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
      responses_store:
        type: sqlite
@ -51,7 +49,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -59,14 +56,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@ -31,7 +31,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
  agents:
  - provider_id: meta-reference
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
      responses_store:
        type: sqlite
@ -50,7 +48,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -58,14 +55,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -36,7 +36,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
      responses_store:
        type: sqlite
@ -54,7 +53,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -62,14 +60,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@ -39,7 +39,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
      responses_store:
        type: sqlite
@ -57,7 +56,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -65,14 +63,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@ -35,7 +35,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
      responses_store:
        type: sqlite
@ -53,7 +52,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -61,14 +59,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -27,7 +27,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -45,7 +44,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
      responses_store:
        type: sqlite
@ -63,7 +61,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -71,14 +68,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -27,7 +27,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
      responses_store:
        type: sqlite
@ -58,7 +56,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -66,14 +63,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
      responses_store:
        type: sqlite
@ -57,7 +55,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -31,7 +31,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
      responses_store:
        type: sqlite
@ -62,7 +60,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
      responses_store:
        type: sqlite
@ -57,7 +55,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -31,7 +31,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
      responses_store:
        type: sqlite
@ -62,7 +60,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
      responses_store:
        type: sqlite
@ -57,7 +55,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@ -48,7 +48,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
      responses_store:
        type: sqlite
@ -66,7 +65,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -74,14 +72,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -41,7 +41,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -54,7 +53,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
      responses_store:
        type: sqlite
@ -72,7 +70,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -80,14 +77,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -31,7 +31,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
      responses_store:
        type: sqlite
@ -62,7 +60,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@ -30,7 +30,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
  safety:
  - provider_id: nvidia
@ -44,7 +43,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
      responses_store:
        type: sqlite
@ -75,7 +73,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
  - provider_id: nvidia
    provider_type: remote::nvidia
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -25,7 +25,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
  safety:
  - provider_id: nvidia
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
      responses_store:
        type: sqlite
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -25,7 +25,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
      responses_store:
        type: sqlite
@ -58,7 +56,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -66,14 +63,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -25,7 +25,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -38,7 +37,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
      responses_store:
        type: sqlite
@ -56,7 +54,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -64,14 +61,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -62,7 +62,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
      responses_store:
        type: sqlite
@ -80,7 +79,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -88,14 +86,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
      responses_store:
        type: sqlite
@ -62,7 +60,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
      responses_store:
        type: sqlite
@ -57,7 +55,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate:
                            provider_id="meta-reference",
                            provider_type="inline::meta-reference",
                            config=dict(
-                                service_name="${env.OTEL_SERVICE_NAME:=}",
+                                service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
                                sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
                                otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
                            ),
--- a/llama_stack/templates/postgres-demo/run.yaml
+++ b/llama_stack/templates/postgres-demo/run.yaml
@ -51,7 +51,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:=}
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
      sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
      otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
  tool_runtime:
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -35,7 +35,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -48,7 +47,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
      responses_store:
        type: sqlite
@ -59,7 +57,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -67,14 +64,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -28,7 +28,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -41,7 +40,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
      responses_store:
        type: sqlite
@ -52,7 +50,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -60,14 +57,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -23,7 +23,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
    provider_type: remote::chromadb
@ -49,7 +48,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
      responses_store:
        type: sqlite
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@ -66,7 +66,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
  - provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
    provider_type: inline::sqlite-vec
@ -78,7 +77,6 @@ providers:
      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
    provider_type: remote::chromadb
@ -111,7 +109,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
      responses_store:
        type: sqlite
@ -129,7 +126,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -137,14 +133,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@ -234,7 +234,6 @@ def get_distribution_template() -> DistributionTemplate:

    default_models = get_model_registry(available_models)

-    postgres_store = PostgresSqlStoreConfig.sample_run_config()
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",
@ -243,7 +242,7 @@ def get_distribution_template() -> DistributionTemplate:
        template_path=None,
        providers=providers,
        available_models_by_provider=available_models,
-        additional_pip_packages=postgres_store.pip_packages,
+        additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.datasets import DatasetPurpose
 from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
+    LLAMA_STACK_RUN_CONFIG_VERSION,
    Api,
    BenchmarkInput,
    BuildConfig,
@ -23,14 +24,15 @@ from llama_stack.distribution.datatypes import (
    ModelInput,
    Provider,
    ShieldInput,
-    StackRunConfig,
    ToolGroupInput,
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages


 def get_model_registry(
@ -87,21 +89,24 @@ class RunConfigSettings(BaseModel):
    default_tool_groups: list[ToolGroupInput] | None = None
    default_datasets: list[DatasetInput] | None = None
    default_benchmarks: list[BenchmarkInput] | None = None
-    metadata_store: KVStoreConfig | None = None
-    inference_store: SqlStoreConfig | None = None
+    metadata_store: dict | None = None
+    inference_store: dict | None = None

    def run_config(
        self,
        name: str,
        providers: dict[str, list[str]],
        container_image: str | None = None,
-    ) -> StackRunConfig:
+    ) -> dict:
        provider_registry = get_provider_registry()

        provider_configs = {}
        for api_str, provider_types in providers.items():
            if api_providers := self.provider_overrides.get(api_str):
-                provider_configs[api_str] = api_providers
+                # Convert Provider objects to dicts for YAML serialization
+                provider_configs[api_str] = [
+                    p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers
+                ]
                continue

            provider_configs[api_str] = []
@ -128,33 +133,40 @@ class RunConfigSettings(BaseModel):
                        provider_id=provider_id,
                        provider_type=provider_type,
                        config=config,
-                    )
+                    ).model_dump(exclude_none=True)
                )

        # Get unique set of APIs from providers
        apis = sorted(providers.keys())

-        return StackRunConfig(
-            image_name=name,
-            container_image=container_image,
-            apis=apis,
-            providers=provider_configs,
-            metadata_store=self.metadata_store
+        # Return a dict that matches StackRunConfig structure
+        return {
+            "version": LLAMA_STACK_RUN_CONFIG_VERSION,
+            "image_name": name,
+            "container_image": container_image,
+            "apis": apis,
+            "providers": provider_configs,
+            "metadata_store": self.metadata_store
            or SqliteKVStoreConfig.sample_run_config(
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="registry.db",
            ),
-            inference_store=self.inference_store
+            "inference_store": self.inference_store
            or SqliteSqlStoreConfig.sample_run_config(
                __distro_dir__=f"~/.llama/distributions/{name}",
                db_name="inference_store.db",
            ),
-            models=self.default_models or [],
-            shields=self.default_shields or [],
-            tool_groups=self.default_tool_groups or [],
-            datasets=self.default_datasets or [],
-            benchmarks=self.default_benchmarks or [],
-        )
+            "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
+            "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
+            "vector_dbs": [],
+            "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
+            "scoring_fns": [],
+            "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
+            "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
+            "server": {
+                "port": 8321,
+            },
+        }


 class DistributionTemplate(BaseModel):
@ -190,10 +202,12 @@ class DistributionTemplate(BaseModel):
            # TODO: This is a hack to get the dependencies for internal APIs into build
            # We should have a better way to do this by formalizing the concept of "internal" APIs
            # and providers, with a way to specify dependencies for them.
-            if run_config_.inference_store:
-                additional_pip_packages.extend(run_config_.inference_store.pip_packages)
-            if run_config_.metadata_store:
-                additional_pip_packages.extend(run_config_.metadata_store.pip_packages)
+
+            if run_config_.get("inference_store"):
+                additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
+
+            if run_config_.get("metadata_store"):
+                additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))

        if self.additional_pip_packages:
            additional_pip_packages.extend(self.additional_pip_packages)
@ -286,7 +300,7 @@ class DistributionTemplate(BaseModel):
            run_config = settings.run_config(self.name, self.providers, self.container_image)
            with open(yaml_output_dir / yaml_pth, "w") as f:
                yaml.safe_dump(
-                    run_config.model_dump(exclude_none=True),
+                    {k: v for k, v in run_config.items() if v is not None},
                    f,
                    sort_keys=False,
                )
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
      responses_store:
        type: sqlite
@ -57,7 +55,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -25,7 +25,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -38,7 +37,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
      responses_store:
        type: sqlite
@ -56,7 +54,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -64,14 +61,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
      responses_store:
        type: sqlite
@ -62,7 +60,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -26,7 +26,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
      responses_store:
        type: sqlite
@ -57,7 +55,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -30,7 +30,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -43,7 +42,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
      responses_store:
        type: sqlite
@ -61,7 +59,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -69,14 +66,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@ -27,7 +27,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
  safety:
  - provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
    config:
      persistence_store:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
      responses_store:
        type: sqlite
@ -58,7 +56,6 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
@ -66,14 +63,12 @@ providers:
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        type: sqlite
-        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
  scoring:
  - provider_id: basic
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@ -9,7 +9,9 @@ pytest --help
 ```

 Here are the most important options:
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
+- `--stack-config`: specify the stack config to use. You have four ways to point to a stack:
+  - **`server:<config>`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
+  - **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:together:8322`)
  - a URL which points to a Llama Stack distribution server
  - a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
  - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
@ -26,12 +28,39 @@ Model parameters can be influenced by the following options:
 Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
 if no model is specified.

-Experimental, under development, options:
- `--record-responses`: record new API responses instead of using cached ones
-
-
 ## Examples

+### Testing against a Server
+
+Run all text inference tests by auto-starting a server with the `fireworks` config:
+
+```bash
+pytest -s -v tests/integration/inference/test_text_inference.py \
+   --stack-config=server:fireworks \
+   --text-model=meta-llama/Llama-3.1-8B-Instruct
+```
+
+Run tests with auto-server startup on a custom port:
+
+```bash
+pytest -s -v tests/integration/inference/ \
+   --stack-config=server:together:8322 \
+   --text-model=meta-llama/Llama-3.1-8B-Instruct
+```
+
+Run multiple test suites with auto-server (eliminates manual server management):
+
+```bash
+# Auto-start server and run all integration tests
+export FIREWORKS_API_KEY=<your_key>
+
+pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \
+   --stack-config=server:fireworks \
+   --text-model=meta-llama/Llama-3.1-8B-Instruct
+```
+
+### Testing with Library Client
+
 Run all text inference tests with the `together` distribution:

 ```bash
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -6,9 +6,13 @@

 import inspect
 import os
+import socket
+import subprocess
 import tempfile
+import time

 import pytest
+import requests
 import yaml
 from llama_stack_client import LlamaStackClient
 from openai import OpenAI
@ -17,6 +21,60 @@ from llama_stack import LlamaStackAsLibraryClient
 from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
 from llama_stack.env import get_env_or_fail

+DEFAULT_PORT = 8321
+
+
+def is_port_available(port: int, host: str = "localhost") -> bool:
+    """Check if a port is available for binding."""
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.bind((host, port))
+            return True
+    except OSError:
+        return False
+
+
+def start_llama_stack_server(config_name: str) -> subprocess.Popen:
+    """Start a llama stack server with the given config."""
+    cmd = ["llama", "stack", "run", config_name]
+    devnull = open(os.devnull, "w")
+    process = subprocess.Popen(
+        cmd,
+        stdout=devnull,  # redirect stdout to devnull to prevent deadlock
+        stderr=devnull,  # redirect stderr to devnull to prevent deadlock
+        text=True,
+        env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"},
+    )
+    return process
+
+
+def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool:
+    """Wait for the server to be ready by polling the health endpoint."""
+    health_url = f"{base_url}/v1/health"
+    start_time = time.time()
+
+    while time.time() - start_time < timeout:
+        if process and process.poll() is not None:
+            print(f"Server process terminated with return code: {process.returncode}")
+            return False
+
+        try:
+            response = requests.get(health_url, timeout=5)
+            if response.status_code == 200:
+                return True
+        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
+            pass
+
+        # Print progress every 5 seconds
+        elapsed = time.time() - start_time
+        if int(elapsed) % 5 == 0 and elapsed > 0:
+            print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)")
+
+        time.sleep(0.5)
+
+    print(f"Server failed to respond within {timeout} seconds")
+    return False
+

@pytest.fixture(scope="session")
 def provider_data():
@ -122,6 +180,41 @@ def llama_stack_client(request, provider_data):
    if not config:
        raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")

+    # Handle server:<config_name> format or server:<config_name>:<port>
+    if config.startswith("server:"):
+        parts = config.split(":")
+        config_name = parts[1]
+        port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+        base_url = f"http://localhost:{port}"
+
+        # Check if port is available
+        if is_port_available(port):
+            print(f"Starting llama stack server with config '{config_name}' on port {port}...")
+
+            # Start server
+            server_process = start_llama_stack_server(config_name)
+
+            # Wait for server to be ready
+            if not wait_for_server_ready(base_url, timeout=30, process=server_process):
+                print("Server failed to start within timeout")
+                server_process.terminate()
+                raise RuntimeError(
+                    f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
+                    f"See server.log for details."
+                )
+
+            print(f"Server is ready at {base_url}")
+
+            # Store process for potential cleanup (pytest will handle termination at session end)
+            request.session._llama_stack_server_process = server_process
+        else:
+            print(f"Port {port} is already in use, assuming server is already running...")
+
+        return LlamaStackClient(
+            base_url=base_url,
+            provider_data=provider_data,
+        )
+
    # check if this looks like a URL
    if config.startswith("http") or "//" in config:
        return LlamaStackClient(
@ -151,3 +244,31 @@ def llama_stack_client(request, provider_data):
 def openai_client(client_with_models):
    base_url = f"{client_with_models.base_url}/v1/openai/v1"
    return OpenAI(base_url=base_url, api_key="fake")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def cleanup_server_process(request):
+    """Cleanup server process at the end of the test session."""
+    yield  # Run tests
+
+    if hasattr(request.session, "_llama_stack_server_process"):
+        server_process = request.session._llama_stack_server_process
+        if server_process:
+            if server_process.poll() is None:
+                print("Terminating llama stack server process...")
+            else:
+                print(f"Server process already terminated with return code: {server_process.returncode}")
+                return
+            try:
+                server_process.terminate()
+                server_process.wait(timeout=10)
+                print("Server process terminated gracefully")
+            except subprocess.TimeoutExpired:
+                print("Server process did not terminate gracefully, killing it")
+                server_process.kill()
+                server_process.wait()
+                print("Server process killed")
+            except Exception as e:
+                print(f"Error during server cleanup: {e}")
+        else:
+            print("Server process not found - won't be able to cleanup")
--- a/tests/unit/fixtures.py
+++ b/tests/unit/fixtures.py
@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-import pytest
+import pytest_asyncio

 from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl


-@pytest.fixture(scope="function")
+@pytest_asyncio.fixture(scope="function")
 async def sqlite_kvstore(tmp_path):
    db_path = tmp_path / "test_kv.db"
    kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path):
    yield kvstore


-@pytest.fixture(scope="function")
+@pytest_asyncio.fixture(scope="function")
 async def disk_dist_registry(sqlite_kvstore):
    registry = DiskDistributionRegistry(sqlite_kvstore)
    await registry.initialize()
    yield registry


-@pytest.fixture(scope="function")
+@pytest_asyncio.fixture(scope="function")
 async def cached_disk_dist_registry(sqlite_kvstore):
    registry = CachedDiskDistributionRegistry(sqlite_kvstore)
    await registry.initialize()
--- a/tests/unit/providers/agents/test_persistence_access_control.py
+++ b/tests/unit/providers/agents/test_persistence_access_control.py
@ -9,6 +9,7 @@ from datetime import datetime
 from unittest.mock import patch

 import pytest
+import pytest_asyncio

 from llama_stack.apis.agents import Turn
 from llama_stack.apis.inference import CompletionMessage, StopReason
@ -16,7 +17,7 @@ from llama_stack.distribution.datatypes import User
 from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo


-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_setup(sqlite_kvstore):
    agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={})
    yield agent_persistence
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@ -148,7 +148,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime
    assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!"


-@pytest.fixture(scope="session")
+@pytest_asyncio.fixture(scope="session")
 async def sqlite_vec_adapter(sqlite_connection):
    config = type("Config", (object,), {"db_path": ":memory:"})  # Mock config with in-memory database
    adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None)
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@ -7,6 +7,7 @@
 from unittest.mock import MagicMock, Mock, patch

 import pytest
+import pytest_asyncio
 import yaml
 from pydantic import TypeAdapter, ValidationError

@ -26,7 +27,7 @@ def _return_model(model):
    return model


-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_setup(cached_disk_dist_registry):
    mock_inference = Mock()
    mock_inference.__provider_spec__ = MagicMock()
@ -245,7 +246,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set
    assert model.identifier == "auto-access-model"


-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_setup_with_access_policy(cached_disk_dist_registry):
    mock_inference = Mock()
    mock_inference.__provider_spec__ = MagicMock()
--- a/tests/unit/server/test_server.py
+++ b/tests/unit/server/test_server.py
@ -0,0 +1,187 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import Mock
+
+from fastapi import HTTPException
+from openai import BadRequestError
+from pydantic import ValidationError
+
+from llama_stack.distribution.access_control.access_control import AccessDeniedError
+from llama_stack.distribution.datatypes import AuthenticationRequiredError
+from llama_stack.distribution.server.server import translate_exception
+
+
+class TestTranslateException:
+    """Test cases for the translate_exception function."""
+
+    def test_translate_access_denied_error(self):
+        """Test that AccessDeniedError is translated to 403 HTTP status."""
+        exc = AccessDeniedError()
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 403
+        assert result.detail == "Permission denied: Insufficient permissions"
+
+    def test_translate_access_denied_error_with_context(self):
+        """Test that AccessDeniedError with context includes detailed information."""
+        from llama_stack.distribution.datatypes import User
+
+        # Create mock user and resource
+        user = User("test-user", {"roles": ["user"], "teams": ["dev"]})
+
+        # Create a simple mock object that implements the ProtectedResource protocol
+        class MockResource:
+            def __init__(self, type: str, identifier: str, owner=None):
+                self.type = type
+                self.identifier = identifier
+                self.owner = owner
+
+        resource = MockResource("vector_db", "test-db")
+
+        exc = AccessDeniedError("create", resource, user)
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 403
+        assert "test-user" in result.detail
+        assert "vector_db::test-db" in result.detail
+        assert "create" in result.detail
+        assert "roles=['user']" in result.detail
+        assert "teams=['dev']" in result.detail
+
+    def test_translate_permission_error(self):
+        """Test that PermissionError is translated to 403 HTTP status."""
+        exc = PermissionError("Permission denied")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 403
+        assert result.detail == "Permission denied: Permission denied"
+
+    def test_translate_value_error(self):
+        """Test that ValueError is translated to 400 HTTP status."""
+        exc = ValueError("Invalid input")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 400
+        assert result.detail == "Invalid value: Invalid input"
+
+    def test_translate_bad_request_error(self):
+        """Test that BadRequestError is translated to 400 HTTP status."""
+        # Create a mock response for BadRequestError
+        mock_response = Mock()
+        mock_response.status_code = 400
+        mock_response.headers = {}
+
+        exc = BadRequestError("Bad request", response=mock_response, body="Bad request")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 400
+        assert result.detail == "Bad request"
+
+    def test_translate_authentication_required_error(self):
+        """Test that AuthenticationRequiredError is translated to 401 HTTP status."""
+        exc = AuthenticationRequiredError("Authentication required")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 401
+        assert result.detail == "Authentication required: Authentication required"
+
+    def test_translate_timeout_error(self):
+        """Test that TimeoutError is translated to 504 HTTP status."""
+        exc = TimeoutError("Operation timed out")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 504
+        assert result.detail == "Operation timed out: Operation timed out"
+
+    def test_translate_asyncio_timeout_error(self):
+        """Test that asyncio.TimeoutError is translated to 504 HTTP status."""
+        exc = TimeoutError()
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 504
+        assert result.detail == "Operation timed out: "
+
+    def test_translate_not_implemented_error(self):
+        """Test that NotImplementedError is translated to 501 HTTP status."""
+        exc = NotImplementedError("Not implemented")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 501
+        assert result.detail == "Not implemented: Not implemented"
+
+    def test_translate_validation_error(self):
+        """Test that ValidationError is translated to 400 HTTP status with proper format."""
+        # Create a mock validation error using proper Pydantic error format
+        exc = ValidationError.from_exception_data(
+            "TestModel",
+            [
+                {
+                    "loc": ("field", "nested"),
+                    "msg": "field required",
+                    "type": "missing",
+                }
+            ],
+        )
+
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 400
+        assert "errors" in result.detail
+        assert len(result.detail["errors"]) == 1
+        assert result.detail["errors"][0]["loc"] == ["field", "nested"]
+        assert result.detail["errors"][0]["msg"] == "Field required"
+        assert result.detail["errors"][0]["type"] == "missing"
+
+    def test_translate_generic_exception(self):
+        """Test that generic exceptions are translated to 500 HTTP status."""
+        exc = Exception("Unexpected error")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 500
+        assert result.detail == "Internal server error: An unexpected error occurred."
+
+    def test_translate_runtime_error(self):
+        """Test that RuntimeError is translated to 500 HTTP status."""
+        exc = RuntimeError("Runtime error")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 500
+        assert result.detail == "Internal server error: An unexpected error occurred."
+
+    def test_multiple_access_denied_scenarios(self):
+        """Test various scenarios that should result in 403 status codes."""
+        # Test AccessDeniedError (uses enhanced message)
+        exc1 = AccessDeniedError()
+        result1 = translate_exception(exc1)
+        assert isinstance(result1, HTTPException)
+        assert result1.status_code == 403
+        assert result1.detail == "Permission denied: Insufficient permissions"
+
+        # Test PermissionError (uses generic message)
+        exc2 = PermissionError("No permission")
+        result2 = translate_exception(exc2)
+        assert isinstance(result2, HTTPException)
+        assert result2.status_code == 403
+        assert result2.detail == "Permission denied: No permission"
+
+        exc3 = PermissionError("Access denied")
+        result3 = translate_exception(exc3)
+        assert isinstance(result3, HTTPException)
+        assert result3.status_code == 403
+        assert result3.detail == "Permission denied: Access denied"