From fc735a414eb013f8e9342c6a1584728c9d02ea4a Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 1 Jul 2025 14:48:46 -0700
Subject: [PATCH 01/10] test: Add one-step integration testing with server
 auto-start (#2580)

## Summary

Add support for `server:<config>` format in `--stack-config` option to
enable seamless one-step integration testing. This eliminates the need
to manually start servers in separate terminals before running tests.

## Key Features

- **Auto-start server**: Automatically launches `llama stack run
<config>` if target port is available
- **Smart reuse**: Reuses existing server if port is already occupied
- **Health check polling**: Waits up to 2 minutes for server readiness
via `/v1/health` endpoint
- **Custom port support**: Use `server:<config>:<port>` for non-default
ports
- **Clean output**: Server runs quietly in background without cluttering
test output
- **Backward compatibility**: All existing `--stack-config` formats
continue to work

## Usage Examples

```bash
# Auto-start server with default port 8321
pytest tests/integration/inference/ --stack-config=server:fireworks

# Use custom port
pytest tests/integration/safety/ --stack-config=server:together:8322

# Run multiple test suites seamlessly
pytest tests/integration/inference/ tests/integration/agents/ --stack-config=server:starter
```

## Implementation Details

- Enhanced `llama_stack_client` fixture with server management
- Updated documentation with cleaner organization and comprehensive
examples
- Added utility functions for port checking, server startup, and health
verification

## Test Plan

- Verified server auto-start when port 8321 is available
- Verified server reuse when port 8321 is occupied
- Tested health check polling via `/v1/health` endpoint
- Confirmed custom port configuration works correctly
- Verified backward compatibility with existing config formats

## Before/After Comparison

**Before (2 steps):**
```bash
# Terminal 1: Start server manually
llama stack run fireworks --port 8321

# Terminal 2: Wait for startup, then run tests
pytest tests/integration/inference/ --stack-config=http://localhost:8321
```

**After (1 step):**
```bash
# Single command handles everything
pytest tests/integration/inference/ --stack-config=server:fireworks
```
---
 tests/integration/README.md          | 39 ++++++++++++--
 tests/integration/fixtures/common.py | 76 ++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 5 deletions(-)
diff --git a/tests/integration/README.md b/tests/integration/README.md
index 31d58c83f..fc8612139 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -9,7 +9,9 @@ pytest --help
 ```
 
 Here are the most important options:
-- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
+- `--stack-config`: specify the stack config to use. You have four ways to point to a stack:
+  - **`server:<config>`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
+  - **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:together:8322`)
   - a URL which points to a Llama Stack distribution server
   - a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
   - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
@@ -26,12 +28,39 @@ Model parameters can be influenced by the following options:
 Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
 if no model is specified.
 
-Experimental, under development, options:
-- `--record-responses`: record new API responses instead of using cached ones
-
-
 ## Examples
 
+### Testing against a Server
+
+Run all text inference tests by auto-starting a server with the `fireworks` config:
+
+```bash
+pytest -s -v tests/integration/inference/test_text_inference.py \
+   --stack-config=server:fireworks \
+   --text-model=meta-llama/Llama-3.1-8B-Instruct
+```
+
+Run tests with auto-server startup on a custom port:
+
+```bash
+pytest -s -v tests/integration/inference/ \
+   --stack-config=server:together:8322 \
+   --text-model=meta-llama/Llama-3.1-8B-Instruct
+```
+
+Run multiple test suites with auto-server (eliminates manual server management):
+
+```bash
+# Auto-start server and run all integration tests
+export FIREWORKS_API_KEY=<your_key>
+
+pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \
+   --stack-config=server:fireworks \
+   --text-model=meta-llama/Llama-3.1-8B-Instruct
+```
+
+### Testing with Library Client
+
 Run all text inference tests with the `together` distribution:
 
 ```bash
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 8b6b3ddbe..2d6092e44 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -6,9 +6,13 @@
 
 import inspect
 import os
+import socket
+import subprocess
 import tempfile
+import time
 
 import pytest
+import requests
 import yaml
 from llama_stack_client import LlamaStackClient
 from openai import OpenAI
@@ -17,6 +21,44 @@ from llama_stack import LlamaStackAsLibraryClient
 from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
 from llama_stack.env import get_env_or_fail
 
+DEFAULT_PORT = 8321
+
+
+def is_port_available(port: int, host: str = "localhost") -> bool:
+    """Check if a port is available for binding."""
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.bind((host, port))
+            return True
+    except OSError:
+        return False
+
+
+def start_llama_stack_server(config_name: str) -> subprocess.Popen:
+    """Start a llama stack server with the given config."""
+    cmd = ["llama", "stack", "run", config_name]
+
+    # Start server in background
+    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    return process
+
+
+def wait_for_server_ready(base_url: str, timeout: int = 120) -> bool:
+    """Wait for the server to be ready by polling the health endpoint."""
+    health_url = f"{base_url}/v1/health"
+    start_time = time.time()
+
+    while time.time() - start_time < timeout:
+        try:
+            response = requests.get(health_url, timeout=5)
+            if response.status_code == 200:
+                return True
+        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
+            pass
+        time.sleep(0.5)
+
+    return False
+
 
 @pytest.fixture(scope="session")
 def provider_data():
@@ -122,6 +164,40 @@ def llama_stack_client(request, provider_data):
     if not config:
         raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
 
+    # Handle server:<config_name> format or server:<config_name>:<port>
+    if config.startswith("server:"):
+        parts = config.split(":")
+        config_name = parts[1]
+        port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+        base_url = f"http://localhost:{port}"
+
+        # Check if port is available
+        if is_port_available(port):
+            print(f"Starting llama stack server with config '{config_name}' on port {port}...")
+
+            # Start server
+            server_process = start_llama_stack_server(config_name)
+
+            # Wait for server to be ready
+            if not wait_for_server_ready(base_url, timeout=120):
+                print("Server failed to start within timeout")
+                server_process.terminate()
+                raise RuntimeError(
+                    f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid."
+                )
+
+            print(f"Server is ready at {base_url}")
+
+            # Store process for potential cleanup (pytest will handle termination at session end)
+            request.session._llama_stack_server_process = server_process
+        else:
+            print(f"Port {port} is already in use, assuming server is already running...")
+
+        return LlamaStackClient(
+            base_url=base_url,
+            provider_data=provider_data,
+        )
+
     # check if this looks like a URL
     if config.startswith("http") or "//" in config:
         return LlamaStackClient(

From 4d0d2d685f9ea9948a0a7ff6da376501291d45c7 Mon Sep 17 00:00:00 2001
From: Jorge <jgarciao@users.noreply.github.com>
Date: Wed, 2 Jul 2025 12:07:05 +0200
Subject: [PATCH 02/10] fix: Set parameter usedforsecurity=False when calling
 hashlib.md5 in order to fix rag_tool.insert on FIPS clusters (#2577)

# What does this PR do?
Set parameter `usedforsecurity=False` when calling hashlib.md5 in order
to fix rag_tool.insert on FIPS clusters

<!-- If resolving an issue, uncomment and update the line below -->
Closes #2571

---------

Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com>
---
 llama_stack/providers/utils/vector_io/chunk_utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llama_stack/providers/utils/vector_io/chunk_utils.py b/llama_stack/providers/utils/vector_io/chunk_utils.py
index 68cf11cad..2a939bfba 100644
--- a/llama_stack/providers/utils/vector_io/chunk_utils.py
+++ b/llama_stack/providers/utils/vector_io/chunk_utils.py
@@ -9,6 +9,11 @@ import uuid
 
 
 def generate_chunk_id(document_id: str, chunk_text: str) -> str:
-    """Generate a unique chunk ID using a hash of document ID and chunk text."""
+    """
+    Generate a unique chunk ID using a hash of the document ID and chunk text.
+
+    Note: MD5 is used only to calculate an identifier, not for security purposes.
+    Adding usedforsecurity=False for compatibility with FIPS environments.
+    """
     hash_input = f"{document_id}:{chunk_text}".encode()
-    return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
+    return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))

From 5b077555565bca3a902a9a67b700e4e423f9981b Mon Sep 17 00:00:00 2001
From: Nate Harada <nharada1@users.noreply.github.com>
Date: Wed, 2 Jul 2025 17:26:51 -0700
Subject: [PATCH 03/10] docs: Minor spelling fix (#2592)

# What does this PR do?
Minor spelling fix in the comments

## Test Plan
No code changes
---
 llama_stack/apis/telemetry/telemetry.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index 0eb53f397..d621e601e 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -101,7 +101,7 @@ class MetricInResponse(BaseModel):
 
 # This is a short term solution to allow inference API to return metrics
 # The ideal way to do this is to have a way for all response types to include metrics
-# and all metric events logged to the telemetry API to be inlcuded with the response
+# and all metric events logged to the telemetry API to be included with the response
 # To do this, we will need to augment all response types with a metrics field.
 # We have hit a blocker from stainless SDK that prevents us from doing this.
 # The blocker is that if we were to augment the response types that have a data field

From 040424acf58094b4227170cef853d768d32c62df Mon Sep 17 00:00:00 2001
From: Wen Zhou <wenzhou@redhat.com>
Date: Thu, 3 Jul 2025 10:12:56 +0200
Subject: [PATCH 04/10] docs: update full list of providers with matched APIs
 and dockerhub images (#2452)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
- add model_type in example
- change "Memory" to "VectorIO" as column name
- update index.md and README.md

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
run pre-commit to catch changes.

---------

Signed-off-by: Wen Zhou <wenzhou@redhat.com>
Co-authored-by: Sébastien Han <seb@redhat.com>
---
 README.md                                  | 71 +++++++++++++---------
 docs/source/distributions/configuration.md | 11 ++--
 docs/source/index.md                       | 35 ++++++++++-
 docs/source/providers/index.md             |  7 ++-
 4 files changed, 87 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 7f34c3340..3b5358ec2 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,8 @@ pip install llama-stack-client
 ### CLI
 ```bash
 # Run a chat completion
+MODEL="Llama-4-Scout-17B-16E-Instruct"
+
 llama-stack-client --endpoint http://localhost:8321 \
 inference chat-completion \
 --model-id meta-llama/$MODEL \
@@ -106,46 +108,59 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
 
 ### API Providers
 Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
+Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)
 
-| **API Provider Builder** |    **Environments**    | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** |
-|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:|
-|      Meta Reference      |      Single Node       |     ✅      |       ✅       |     ✅      |     ✅      |       ✅       |               |
-|        SambaNova         |         Hosted         |            |       ✅       |            |     ✅      |               |                  |
-|         Cerebras         |         Hosted         |            |       ✅       |            |            |               |                  |
-|        Fireworks         |         Hosted         |     ✅      |       ✅       |     ✅      |            |               |                |
-|       AWS Bedrock        |         Hosted         |            |       ✅       |            |     ✅      |               |                |
-|         Together         |         Hosted         |     ✅      |       ✅       |            |     ✅      |               |                |
-|           Groq           |         Hosted         |            |       ✅       |            |            |               |                 |
-|          Ollama          |      Single Node       |            |       ✅       |            |            |               |                 |
-|           TGI            | Hosted and Single Node |            |       ✅       |            |            |               |                 |
-|        NVIDIA NIM        | Hosted and Single Node |            |       ✅       |            |            |               |                 |
-|          Chroma          |      Single Node       |            |               |     ✅      |            |               |                 |
-|        PG Vector         |      Single Node       |            |               |     ✅      |            |               |                 |
-|    PyTorch ExecuTorch    |     On-device iOS      |     ✅      |       ✅       |            |            |               |                |
-|           vLLM           | Hosted and Single Node |            |       ✅       |            |            |               |                 |
-|          OpenAI          |         Hosted         |            |       ✅       |            |            |               |                 |
-|        Anthropic         |         Hosted         |            |       ✅       |            |            |               |                 |
-|          Gemini          |         Hosted         |            |       ✅       |            |            |               |                 |
-|          watsonx         |         Hosted         |            |       ✅       |            |            |               |                 |
-|        HuggingFace       |       Single Node      |            |                |            |            |               |       ✅        |
-|         TorchTune        |       Single Node      |            |                |            |            |               |       ✅        |
-|       NVIDIA NEMO        |         Hosted         |            |                |            |            |               |       ✅        |
+| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
+|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
+| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| SambaNova | Hosted | | ✅ | | ✅ | | | | |
+| Cerebras | Hosted | | ✅ | | | | | | |
+| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
+| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
+| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
+| Groq | Hosted | | ✅ | | | | | | |
+| Ollama | Single Node | | ✅ | | | | | | |
+| TGI | Hosted/Single Node | | ✅ | | | | | | |
+| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
+| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
+| PG Vector | Single Node | | | ✅ | | | | | |
+| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
+| vLLM | Single Node | | ✅ | | | | | | |
+| OpenAI | Hosted | | ✅ | | | | | | |
+| Anthropic | Hosted | | ✅ | | | | | | |
+| Gemini | Hosted | | ✅ | | | | | | |
+| WatsonX | Hosted | | ✅ | | | | | | |
+| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
+| TorchTune | Single Node | | | | | | ✅ | | |
+| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
+| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
 
+> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation.
 
 ### Distributions
 
-A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support:
+A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
+Here are some of the distributions we support:
 
 |               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
 |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html)      |
-|                   SambaNova                   |                     [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html)   |
-|                   Cerebras                    |                     [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html)   |
+|                      TGI                      |                          [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general)                          |             [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)
+|                       vLLM                    |                  [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general)                  |         [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)
+|                   Starter                     |                    [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general)                    |                   |
+|                   PostgreSQL                  |                [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general)                |                  |
+
+
+Here are the ones out of support scope but still avaiable from Dockerhub:
+
+|               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
+|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                    Ollama                     |                       [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general)                       |            [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html)            |
-|                      TGI                      |                          [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general)                          |             [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)              |
 |                   Together                    |                     [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general)                     |           [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html)           |
 |                   Fireworks                   |                    [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general)                    |          [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html)           |
-| vLLM |                  [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general)                  |         [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)          |
+|                   AWS Bedrock                 |                    [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general)                    |          [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/bedrock.html)           |
+|                   SambaNova                   |                     [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html)   |
+|                   Cerebras                    |                     [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html)   |          |             |         |
 
 
 ### Documentation
diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index 0a0ce994f..1bba6677e 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -77,10 +77,10 @@ Next up is the most critical part: the set of providers that the stack will use
 ```yaml
 providers:
   inference:
-  # provider_id is a string you can choose freely
+    # provider_id is a string you can choose freely
   - provider_id: ollama
     # provider_type is a string that specifies the type of provider.
-    # in this case, the provider for inference is ollama and it is run remotely (outside of the distribution)
+    # in this case, the provider for inference is ollama and it runs remotely (outside of the distribution)
     provider_type: remote::ollama
     # config is a dictionary that contains the configuration for the provider.
     # in this case, the configuration is the url of the ollama server
@@ -88,7 +88,7 @@ providers:
       url: ${env.OLLAMA_URL:=http://localhost:11434}
 ```
 A few things to note:
-- A _provider instance_ is identified with an (id, type, configuration) triplet.
+- A _provider instance_ is identified with an (id, type, config) triplet.
 - The id is a string you can choose freely.
 - You can instantiate any number of provider instances of the same type.
 - The configuration dictionary is provider-specific.
@@ -187,7 +187,7 @@ The environment variable substitution system is type-safe:
 
 ## Resources
 
-Finally, let's look at the `models` section:
+Let's look at the `models` section:
 
 ```yaml
 models:
@@ -195,8 +195,9 @@ models:
   model_id: ${env.INFERENCE_MODEL}
   provider_id: ollama
   provider_model_id: null
+  model_type: llm
 ```
-A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models.
+A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.
 
 What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
 
diff --git a/docs/source/index.md b/docs/source/index.md
index 1df5e8507..755b228e3 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -73,17 +73,26 @@ A number of "adapters" are available for some popular Inference and Vector Store
 |  OpenAI  |  Hosted  |
 |  Anthropic  |  Hosted  |
 |  Gemini  |  Hosted  |
+|  WatsonX  |  Hosted  |
 
+**Agents API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+|  Fireworks  |  Hosted  |
+|  Together  |  Hosted  |
+|  PyTorch ExecuTorch | On-device iOS |
 
 **Vector IO API**
 |  **Provider** |  **Environments** |
 | :----: | :----: |
 |  FAISS | Single Node |
-|  SQLite-Vec| Single Node |
+|  SQLite-Vec | Single Node |
 |  Chroma | Hosted and Single Node |
 |  Milvus | Hosted and Single Node |
 |  Postgres (PGVector) | Hosted and Single Node |
 |  Weaviate | Hosted |
+|  Qdrant  | Hosted and Single Node |
 
 **Safety API**
 |  **Provider** |  **Environments** |
@@ -93,6 +102,30 @@ A number of "adapters" are available for some popular Inference and Vector Store
 |  Code Scanner | Single Node |
 |  AWS Bedrock | Hosted |
 
+**Post Training API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+|  HuggingFace  |  Single Node |
+|  TorchTune  |  Single Node |
+|  NVIDIA NEMO  |  Hosted |
+
+**Eval API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+|  NVIDIA NEMO  |  Hosted |
+
+**Telemetry API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Meta Reference  |  Single Node |
+
+**Tool Runtime API**
+|  **Provider** |  **Environments** |
+| :----: | :----: |
+|  Brave Search | Hosted |
+|  RAG Runtime | Single Node |
 
 ```{toctree}
 :hidden:
diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md
index 3ea253685..f804582d7 100644
--- a/docs/source/providers/index.md
+++ b/docs/source/providers/index.md
@@ -1,9 +1,10 @@
 # Providers Overview
 
 The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
-- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.),
-- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.),
-- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.)
+- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
+- Vector databases (e.g., FAISS, SQLite-Vec, ChromaDB, Weaviate, Qdrant, Milvus, PGVector, etc.),
+- Safety providers (e.g., Meta's Llama Guard, Prompt Guard, Code Scanner, AWS Bedrock Guardrails, etc.),
+- Tool Runtime providers (e.g., RAG Runtime, Brave Search, etc.)
 
 Providers come in two flavors:
 - **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code.

From 577ec382e1ae49ef66d23b6dd6be6104915753ec Mon Sep 17 00:00:00 2001
From: Sumanth Kamenani <skamenan@redhat.com>
Date: Thu, 3 Jul 2025 05:14:51 -0400
Subject: [PATCH 05/10] fix(docs): update Agents101 notebook for builtin
 websearch (#2591)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Switch from BRAVE_SEARCH_API_KEY to TAVILY_SEARCH_API_KEY
- Add provider_data to LlamaStackClient for API key passing
- Use builtin::websearch toolgroup instead of manual tool config
- Fix message types to use UserMessage instead of plain dict
- Add streaming support with proper type casting
- Remove async from EventLogger loop (bug fix)

Fixes websearch functionality in agents tutorial by properly configuring
Tavily search provider integration.
# What does this PR do?

Fixes the Agents101 tutorial notebook to work with the current Llama
Stack websearch implementation. The tutorial was using outdated Brave
Search configuration that no longer works with the current server setup.

**Key Changes:**
- **Switch API provider**: Change from `BRAVE_SEARCH_API_KEY` to
`TAVILY_SEARCH_API_KEY` to match server configuration
- **Fix client setup**: Add `provider_data` to `LlamaStackClient` to
properly pass API keys to server
- **Modernize tool usage**: Replace manual tool configuration with
`tools=["builtin::websearch"]`
- **Fix type safety**: Use `UserMessage` type instead of plain
dictionaries for messages
- **Fix streaming**: Add proper streaming support with `stream=True` and
type casting
- **Fix EventLogger**: Remove incorrect `async for` usage (should be
`for`)

**Why needed:** Users following the tutorial were getting 401
Unauthorized errors because the notebook wasn't properly configured for
the Tavily search provider that the server actually uses.

## Test Plan

**Prerequisites:**
1. Start Llama Stack server with Ollama template and
`TAVILY_SEARCH_API_KEY` environment variable
2. Set `TAVILY_SEARCH_API_KEY` in your `.env` file

**Testing Steps:**
1. **Clone and setup:**
   ```bash
   git checkout fix-2558-update-agents101
   cd docs/zero_to_hero_guide/
   ```

2. **Start server with API key:**
   ```bash
   export TAVILY_SEARCH_API_KEY="your_tavily_api_key"
   podman run -it --network=host -v ~/.llama:/root/.llama:Z \
     --env INFERENCE_MODEL=$INFERENCE_MODEL \
     --env OLLAMA_URL=http://localhost:11434 \
     --env TAVILY_SEARCH_API_KEY=$TAVILY_SEARCH_API_KEY \
     llamastack/distribution-ollama --port $LLAMA_STACK_PORT
   ```

3. **Run the notebook:**
   - Open `07_Agents101.ipynb` in Jupyter
   - Execute all cells in order
- Cell 5 should run without errors and show successful web search
results

**Expected Results:**
- ✅ No 401 Unauthorized errors
- ✅ Agent successfully calls `brave_search.call()` with web results
- ✅ Switzerland travel recommendations appear in output
- ✅ Follow-up questions work correctly

**Before this fix:** Users got `401 Unauthorized` errors and tutorial
failed
**After this fix:** Tutorial works end-to-end with proper web search
functionality

**Tested with:**
- Tavily API key (free tier)
- Ollama distribution template
- Llama-3.2-3B-Instruct model
---
 docs/zero_to_hero_guide/07_Agents101.ipynb | 29 +++++++++++-----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/docs/zero_to_hero_guide/07_Agents101.ipynb b/docs/zero_to_hero_guide/07_Agents101.ipynb
index b6df2a4c8..905799946 100644
--- a/docs/zero_to_hero_guide/07_Agents101.ipynb
+++ b/docs/zero_to_hero_guide/07_Agents101.ipynb
@@ -45,7 +45,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": 14,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -65,7 +65,7 @@
         "from dotenv import load_dotenv\n",
         "\n",
         "load_dotenv()\n",
-        "BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
+        "TAVILY_SEARCH_API_KEY = os.environ[\"TAVILY_SEARCH_API_KEY\"]\n"
       ]
     },
     {
@@ -110,10 +110,17 @@
         "from llama_stack_client import LlamaStackClient\n",
         "from llama_stack_client.lib.agents.agent import Agent\n",
         "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+        "from llama_stack_client.types import UserMessage\n",
+        "from typing import cast, Iterator\n",
         "\n",
         "\n",
         "async def agent_example():\n",
-        "    client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n",
+        "    client = LlamaStackClient(\n",
+        "        base_url=f\"http://{HOST}:{PORT}\",\n",
+        "        provider_data={\n",
+        "            \"tavily_search_api_key\": TAVILY_SEARCH_API_KEY,\n",
+        "        }\n",
+        "    )\n",
         "    agent = Agent(\n",
         "        client,\n",
         "        model=MODEL_NAME,\n",
@@ -123,13 +130,7 @@
         "                \"type\": \"greedy\",\n",
         "            },\n",
         "        },\n",
-        "        tools=[\n",
-        "            {\n",
-        "                \"type\": \"brave_search\",\n",
-        "                \"engine\": \"brave\",\n",
-        "                \"api_key\": BRAVE_SEARCH_API_KEY,\n",
-        "            }\n",
-        "        ],\n",
+        "        tools=[\"builtin::websearch\"],\n",
         "    )\n",
         "    session_id = agent.create_session(\"test-session\")\n",
         "    print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n",
@@ -142,15 +143,13 @@
         "    for prompt in user_prompts:\n",
         "        response = agent.create_turn(\n",
         "            messages=[\n",
-        "                {\n",
-        "                    \"role\": \"user\",\n",
-        "                    \"content\": prompt,\n",
-        "                }\n",
+        "                UserMessage(role=\"user\", content=prompt)\n",
         "            ],\n",
         "            session_id=session_id,\n",
+        "            stream=True,\n",
         "        )\n",
         "\n",
-        "        async for log in EventLogger().log(response):\n",
+        "        for log in EventLogger().log(cast(Iterator, response)):\n",
         "            log.print()\n",
         "\n",
         "\n",

From b246b0660ee7b346d232606c29a6cb30a142ce5c Mon Sep 17 00:00:00 2001
From: Christian Zaccaria <73656840+ChristianZaccaria@users.noreply.github.com>
Date: Thu, 3 Jul 2025 12:55:43 +0100
Subject: [PATCH 06/10] docs: Add quick_start.ipynb notebook equivalent of
 index.md Quickstart guide (#2128)

# What does this PR do?
- Adding a notebook equivalent of the
[getting_started/index.md#Quickstart
guide](https://github.com/meta-llama/llama-stack/blob/main/docs/source/getting_started/index.md).


## To discuss

**Note:** works locally, but I am encountering issues when attempting to
run through the notebook on Google Colab. Specifically, on the last step
to run the demo, the `knowledge_search` tool doesn't seem to be called
i.e.,:
```
rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html
prompt> How do you do great work?
inference> I don't have personal experiences or emotions, but I was trained on a large corpus of text data and use various techniques such as natural language processing (NLP) and machine learning algorithms to generate human-like responses.

```


I would expect to get something like:
```
rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html
prompt> How do you do great work?
inference> [knowledge_search(query="What is the key to doing great work")]
tool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}
tool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:
....
....
```
---
 docs/getting_started.ipynb           |   4 +-
 docs/getting_started_llama4.ipynb    |   4 +-
 docs/getting_started_llama_api.ipynb |   4 +-
 docs/quick_start.ipynb               | 367 +++++++++++++++++++++++++++
 docs/source/getting_started/index.md |   2 +
 5 files changed, 378 insertions(+), 3 deletions(-)
 create mode 100644 docs/quick_start.ipynb

diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index cdaf074b8..88878c9be 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -17,7 +17,9 @@
         "\n",
         "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
         "\n",
-        "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n"
+        "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n",
+        "\n",
+        "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
       ]
     },
     {
diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb
index d489b5d06..edefda28c 100644
--- a/docs/getting_started_llama4.ipynb
+++ b/docs/getting_started_llama4.ipynb
@@ -17,7 +17,9 @@
         "\n",
         "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
         "\n",
-        "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
+        "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
+        "\n",
+        "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
       ]
     },
     {
diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb
index 128e9114a..e6c74986b 100644
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
@@ -17,7 +17,9 @@
           "\n",
           "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
           "\n",
-          "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
+          "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
+          "\n",
+          "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
         ]
       },
       {
diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb
new file mode 100644
index 000000000..4ae1dbe8d
--- /dev/null
+++ b/docs/quick_start.ipynb
@@ -0,0 +1,367 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "c1e7571c",
+      "metadata": {
+        "id": "c1e7571c"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
+        "\n",
+        "# Llama Stack - Building AI Applications\n",
+        "\n",
+        "<img src=\"https://llama-stack.readthedocs.io/en/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
+        "\n",
+        "Get started with Llama Stack in minutes!\n",
+        "\n",
+        "[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
+        "\n",
+        "In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
+        "as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "4CV1Q19BDMVw",
+      "metadata": {
+        "id": "4CV1Q19BDMVw"
+      },
+      "source": [
+        "## Step 1: Install and setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "K4AvfUAJZOeS",
+      "metadata": {
+        "id": "K4AvfUAJZOeS"
+      },
+      "source": [
+        "### 1.1. Install uv and test inference with Ollama\n",
+        "\n",
+        "We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "7a2d7b85",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%pip install uv llama_stack llama-stack-client\n",
+        "\n",
+        "## If running on Collab:\n",
+        "# !pip install colab-xterm\n",
+        "# %load_ext colabxterm\n",
+        "\n",
+        "!curl https://ollama.ai/install.sh | sh"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "39fa584b",
+      "metadata": {},
+      "source": [
+        "### 1.2. Test inference with Ollama"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "3bf81522",
+      "metadata": {},
+      "source": [
+        "We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a7e8e0f1",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "## If running on Colab:\n",
+        "# %xterm\n",
+        "\n",
+        "## To be ran in the terminal:\n",
+        "# ollama serve &\n",
+        "# ollama run llama3.2:3b --keepalive 60m"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "f3c5f243",
+      "metadata": {},
+      "source": [
+        "If successful, you should see the model respond to a prompt.\n",
+        "\n",
+        "...\n",
+        "```\n",
+        ">>> hi\n",
+        "Hello! How can I assist you today?\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "oDUB7M_qe-Gs",
+      "metadata": {
+        "id": "oDUB7M_qe-Gs"
+      },
+      "source": [
+        "## Step 2: Run the Llama Stack server\n",
+        "\n",
+        "In this showcase, we will start a Llama Stack server that is running locally."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "732eadc6",
+      "metadata": {},
+      "source": [
+        "### 2.1. Setup the Llama Stack Server"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "J2kGed0R5PSf",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "J2kGed0R5PSf",
+        "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
+      },
+      "outputs": [],
+      "source": [
+        "import os \n",
+        "import subprocess\n",
+        "\n",
+        "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
+        "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
+        "\n",
+        "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
+        "!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n",
+        "\n",
+        "def run_llama_stack_server_background():\n",
+        "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
+        "    process = subprocess.Popen(\n",
+        "        f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n",
+        "        shell=True,\n",
+        "        stdout=log_file,\n",
+        "        stderr=log_file,\n",
+        "        text=True\n",
+        "    )\n",
+        "    \n",
+        "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
+        "    return process\n",
+        "\n",
+        "def wait_for_server_to_start():\n",
+        "    import requests\n",
+        "    from requests.exceptions import ConnectionError\n",
+        "    import time\n",
+        "    \n",
+        "    url = \"http://0.0.0.0:8321/v1/health\"\n",
+        "    max_retries = 30\n",
+        "    retry_interval = 1\n",
+        "    \n",
+        "    print(\"Waiting for server to start\", end=\"\")\n",
+        "    for _ in range(max_retries):\n",
+        "        try:\n",
+        "            response = requests.get(url)\n",
+        "            if response.status_code == 200:\n",
+        "                print(\"\\nServer is ready!\")\n",
+        "                return True\n",
+        "        except ConnectionError:\n",
+        "            print(\".\", end=\"\", flush=True)\n",
+        "            time.sleep(retry_interval)\n",
+        "            \n",
+        "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
+        "    return False\n",
+        "\n",
+        "\n",
+        "# use this helper if needed to kill the server \n",
+        "def kill_llama_stack_server():\n",
+        "    # Kill any existing llama stack server processes\n",
+        "    os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "c40e9efd",
+      "metadata": {},
+      "source": [
+        "### 2.2. Start the Llama Stack Server"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "id": "f779283d",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Starting Llama Stack server with PID: 787100\n",
+            "Waiting for server to start\n",
+            "Server is ready!\n"
+          ]
+        }
+      ],
+      "source": [
+        "server_process = run_llama_stack_server_background()\n",
+        "assert wait_for_server_to_start()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "28477c03",
+      "metadata": {},
+      "source": [
+        "## Step 3: Run the demo"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "id": "7da71011",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
+            "prompt> How do you do great work?\n",
+            "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
+            "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
+            "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
+            "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
+            "\u001b[30m\u001b[0m"
+          ]
+        }
+      ],
+      "source": [
+        "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
+        "\n",
+        "vector_db_id = \"my_demo_vector_db\"\n",
+        "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
+        "\n",
+        "models = client.models.list()\n",
+        "\n",
+        "# Select the first LLM and first embedding models\n",
+        "model_id = next(m for m in models if m.model_type == \"llm\").identifier\n",
+        "embedding_model_id = (\n",
+        "    em := next(m for m in models if m.model_type == \"embedding\")\n",
+        ").identifier\n",
+        "embedding_dimension = em.metadata[\"embedding_dimension\"]\n",
+        "\n",
+        "_ = client.vector_dbs.register(\n",
+        "    vector_db_id=vector_db_id,\n",
+        "    embedding_model=embedding_model_id,\n",
+        "    embedding_dimension=embedding_dimension,\n",
+        "    provider_id=\"faiss\",\n",
+        ")\n",
+        "source = \"https://www.paulgraham.com/greatwork.html\"\n",
+        "print(\"rag_tool> Ingesting document:\", source)\n",
+        "document = RAGDocument(\n",
+        "    document_id=\"document_1\",\n",
+        "    content=source,\n",
+        "    mime_type=\"text/html\",\n",
+        "    metadata={},\n",
+        ")\n",
+        "client.tool_runtime.rag_tool.insert(\n",
+        "    documents=[document],\n",
+        "    vector_db_id=vector_db_id,\n",
+        "    chunk_size_in_tokens=50,\n",
+        ")\n",
+        "agent = Agent(\n",
+        "    client,\n",
+        "    model=model_id,\n",
+        "    instructions=\"You are a helpful assistant\",\n",
+        "    tools=[\n",
+        "        {\n",
+        "            \"name\": \"builtin::rag/knowledge_search\",\n",
+        "            \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
+        "        }\n",
+        "    ],\n",
+        ")\n",
+        "\n",
+        "prompt = \"How do you do great work?\"\n",
+        "print(\"prompt>\", prompt)\n",
+        "\n",
+        "response = agent.create_turn(\n",
+        "    messages=[{\"role\": \"user\", \"content\": prompt}],\n",
+        "    session_id=agent.create_session(\"rag_session\"),\n",
+        "    stream=True,\n",
+        ")\n",
+        "\n",
+        "for log in AgentEventLogger().log(response):\n",
+        "    log.print()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "341aaadf",
+      "metadata": {},
+      "source": [
+        "Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e88e1185",
+      "metadata": {},
+      "source": [
+        "## Next Steps"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "bcb73600",
+      "metadata": {},
+      "source": [
+        "Now you're ready to dive deeper into Llama Stack!\n",
+        "- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
+        "- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
+        "- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
+        "- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
+        "- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
+        "- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
+        "- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.6"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md
index 8382758cc..ea45da1f7 100644
--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@@ -8,6 +8,8 @@ environments. You can build and test using a local server first and deploy to a
 In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
 as the inference [provider](../providers/inference/index) for a Llama Model.
 
+**💡 Notebook Version:** You can also follow this quickstart guide in a Jupyter notebook format: [quick_start.ipynb](https://github.com/meta-llama/llama-stack/blob/main/docs/quick_start.ipynb)
+
 #### Step 1: Install and setup
 1. Install [uv](https://docs.astral.sh/uv/)
 2. Run inference on a Llama model with [Ollama](https://ollama.com/download)

From aa273944fd3570ed2fdb6f8803d57176343dd78b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 3 Jul 2025 14:59:01 +0200
Subject: [PATCH 07/10] fix: add mcp dependency to agent provider (#2587)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

The agent depends on utils.tools.mcp.

Closes: https://github.com/meta-llama/llama-stack/issues/2576

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/providers/registry/agents.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py
index 834e81b96..6f8c05a67 100644
--- a/llama_stack/providers/registry/agents.py
+++ b/llama_stack/providers/registry/agents.py
@@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
                 "pillow",
                 "pandas",
                 "scikit-learn",
+                "mcp",
             ]
             + kvstore_dependencies(),  # TODO make this dynamic based on the kvstore config
             module="llama_stack.providers.inline.agents.meta_reference",

From 3c43a2f529b9e7b0dcbd9c522a76ab0fe6a00657 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Thu, 3 Jul 2025 10:07:23 -0700
Subject: [PATCH 08/10] fix: store configs (#2593)

# What does this PR do?
https://github.com/meta-llama/llama-stack/pull/2490 broke postgres_demo,
as the config expected a str but the value was converted to int.

This PR:
1. Updates the type of port in sqlstore to be int
2. template generation uses `dict` instead of `StackRunConfig` so as to
avoid failing pydantic typechecks.
3. Adds `replace_env_vars` to StackRunConfig instantiation in
`configure.py` (not sure why this wasn't needed before).

## Test Plan
`llama stack build --template postgres_demo --image-type conda --run`
---
 .../providers/agents/inline_meta-reference.md |  1 -
 .../providers/datasetio/inline_localfs.md     |  1 -
 .../providers/datasetio/remote_huggingface.md |  1 -
 .../providers/eval/inline_meta-reference.md   |  1 -
 .../providers/vector_io/inline_faiss.md       |  1 -
 .../vector_io/inline_meta-reference.md        |  1 -
 .../providers/vector_io/inline_milvus.md      |  1 -
 llama_stack/distribution/configure.py         |  5 +-
 llama_stack/distribution/store/registry.py    |  4 +-
 llama_stack/providers/utils/kvstore/config.py | 40 +++++++----
 .../providers/utils/sqlstore/sqlstore.py      | 56 ++++++++++------
 llama_stack/templates/bedrock/run.yaml        |  5 --
 llama_stack/templates/cerebras/run.yaml       |  5 --
 llama_stack/templates/ci-tests/run.yaml       |  4 --
 .../templates/dell/run-with-safety.yaml       |  4 --
 llama_stack/templates/dell/run.yaml           |  4 --
 .../templates/fireworks/run-with-safety.yaml  |  5 --
 llama_stack/templates/fireworks/run.yaml      |  5 --
 llama_stack/templates/groq/run.yaml           |  5 --
 .../hf-endpoint/run-with-safety.yaml          |  5 --
 llama_stack/templates/hf-endpoint/run.yaml    |  5 --
 .../hf-serverless/run-with-safety.yaml        |  5 --
 llama_stack/templates/hf-serverless/run.yaml  |  5 --
 llama_stack/templates/llama_api/run.yaml      |  4 --
 .../meta-reference-gpu/run-with-safety.yaml   |  5 --
 .../templates/meta-reference-gpu/run.yaml     |  5 --
 .../templates/nvidia/run-with-safety.yaml     |  3 -
 llama_stack/templates/nvidia/run.yaml         |  2 -
 .../templates/ollama/run-with-safety.yaml     |  5 --
 llama_stack/templates/ollama/run.yaml         |  5 --
 llama_stack/templates/open-benchmark/run.yaml |  4 --
 .../passthrough/run-with-safety.yaml          |  5 --
 llama_stack/templates/passthrough/run.yaml    |  5 --
 .../templates/postgres-demo/postgres_demo.py  |  2 +-
 llama_stack/templates/postgres-demo/run.yaml  |  2 +-
 .../remote-vllm/run-with-safety.yaml          |  5 --
 llama_stack/templates/remote-vllm/run.yaml    |  5 --
 llama_stack/templates/sambanova/run.yaml      |  2 -
 llama_stack/templates/starter/run.yaml        |  6 --
 llama_stack/templates/starter/starter.py      |  3 +-
 llama_stack/templates/template.py             | 66 +++++++++++--------
 .../templates/tgi/run-with-safety.yaml        |  5 --
 llama_stack/templates/tgi/run.yaml            |  5 --
 .../templates/together/run-with-safety.yaml   |  5 --
 llama_stack/templates/together/run.yaml       |  5 --
 llama_stack/templates/vllm-gpu/run.yaml       |  5 --
 llama_stack/templates/watsonx/run.yaml        |  5 --
 47 files changed, 110 insertions(+), 223 deletions(-)

diff --git a/docs/source/providers/agents/inline_meta-reference.md b/docs/source/providers/agents/inline_meta-reference.md
index cfc0c6881..5f64f79e1 100644
--- a/docs/source/providers/agents/inline_meta-reference.md
+++ b/docs/source/providers/agents/inline_meta-reference.md
@@ -16,7 +16,6 @@ Meta's reference implementation of an agent system that can use tools, access ve
 ```yaml
 persistence_store:
   type: sqlite
-  namespace: null
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
 responses_store:
   type: sqlite
diff --git a/docs/source/providers/datasetio/inline_localfs.md b/docs/source/providers/datasetio/inline_localfs.md
index fbe4c40e3..87a0c795c 100644
--- a/docs/source/providers/datasetio/inline_localfs.md
+++ b/docs/source/providers/datasetio/inline_localfs.md
@@ -15,7 +15,6 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
 ```yaml
 kvstore:
   type: sqlite
-  namespace: null
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
 
 ```
diff --git a/docs/source/providers/datasetio/remote_huggingface.md b/docs/source/providers/datasetio/remote_huggingface.md
index e2052602e..3711f7396 100644
--- a/docs/source/providers/datasetio/remote_huggingface.md
+++ b/docs/source/providers/datasetio/remote_huggingface.md
@@ -15,7 +15,6 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
 ```yaml
 kvstore:
   type: sqlite
-  namespace: null
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
 
 ```
diff --git a/docs/source/providers/eval/inline_meta-reference.md b/docs/source/providers/eval/inline_meta-reference.md
index 704741b5a..606883c72 100644
--- a/docs/source/providers/eval/inline_meta-reference.md
+++ b/docs/source/providers/eval/inline_meta-reference.md
@@ -15,7 +15,6 @@ Meta's reference implementation of evaluation tasks with support for multiple la
 ```yaml
 kvstore:
   type: sqlite
-  namespace: null
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
 
 ```
diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md
index 2dcf4625b..bcff66f3f 100644
--- a/docs/source/providers/vector_io/inline_faiss.md
+++ b/docs/source/providers/vector_io/inline_faiss.md
@@ -44,7 +44,6 @@ more details about Faiss in general.
 ```yaml
 kvstore:
   type: sqlite
-  namespace: null
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
 
 ```
diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md
index c9ca12ff2..0aac445bd 100644
--- a/docs/source/providers/vector_io/inline_meta-reference.md
+++ b/docs/source/providers/vector_io/inline_meta-reference.md
@@ -15,7 +15,6 @@ Meta's reference implementation of a vector database.
 ```yaml
 kvstore:
   type: sqlite
-  namespace: null
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
 
 ```
diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md
index 8e99d7f95..65c67f3ee 100644
--- a/docs/source/providers/vector_io/inline_milvus.md
+++ b/docs/source/providers/vector_io/inline_milvus.md
@@ -19,7 +19,6 @@ Please refer to the remote provider documentation.
 db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
 kvstore:
   type: sqlite
-  namespace: null
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
 
 ```
diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py
index e58ea0338..35b216b30 100644
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
     builtin_automatically_routed_apis,
     get_provider_registry,
 )
+from llama_stack.distribution.stack import replace_env_vars
 from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
@@ -163,7 +164,7 @@ def upgrade_from_routing_table(
 def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
     version = config_dict.get("version", None)
     if version == LLAMA_STACK_RUN_CONFIG_VERSION:
-        return StackRunConfig(**config_dict)
+        return StackRunConfig(**replace_env_vars(config_dict))
 
     if "routing_table" in config_dict:
         logger.info("Upgrading config...")
@@ -174,4 +175,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
     if not config_dict.get("external_providers_dir", None):
         config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
 
-    return StackRunConfig(**config_dict)
+    return StackRunConfig(**replace_env_vars(config_dict))
diff --git a/llama_stack/distribution/store/registry.py b/llama_stack/distribution/store/registry.py
index 0e84854c2..cd7cd9f00 100644
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@@ -10,11 +10,11 @@ from typing import Protocol
 
 import pydantic
 
-from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider
+from llama_stack.distribution.datatypes import RoutableObjectWithProvider
 from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 
 logger = get_logger(__name__, category="core")
 
diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py
index a45ff4ce8..0219bbebe 100644
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@@ -36,15 +36,14 @@ class RedisKVStoreConfig(CommonConfig):
     def url(self) -> str:
         return f"redis://{self.host}:{self.port}"
 
-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
         return ["redis"]
 
     @classmethod
     def sample_run_config(cls):
         return {
             "type": "redis",
-            "namespace": None,
             "host": "${env.REDIS_HOST:=localhost}",
             "port": "${env.REDIS_PORT:=6379}",
         }
@@ -57,15 +56,14 @@ class SqliteKVStoreConfig(CommonConfig):
         description="File path for the sqlite database",
     )
 
-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
         return ["aiosqlite"]
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
         return {
             "type": "sqlite",
-            "namespace": None,
             "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
         }
 
@@ -73,7 +71,7 @@ class SqliteKVStoreConfig(CommonConfig):
 class PostgresKVStoreConfig(CommonConfig):
     type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
     host: str = "localhost"
-    port: str = "5432"
+    port: int = 5432
     db: str = "llamastack"
     user: str
     password: str | None = None
@@ -83,7 +81,6 @@ class PostgresKVStoreConfig(CommonConfig):
     def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
         return {
             "type": "postgres",
-            "namespace": None,
             "host": "${env.POSTGRES_HOST:=localhost}",
             "port": "${env.POSTGRES_PORT:=5432}",
             "db": "${env.POSTGRES_DB:=llamastack}",
@@ -108,8 +105,8 @@ class PostgresKVStoreConfig(CommonConfig):
             raise ValueError("Table name must be less than 63 characters")
         return v
 
-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
         return ["psycopg2-binary"]
 
 
@@ -122,15 +119,14 @@ class MongoDBKVStoreConfig(CommonConfig):
     password: str | None = None
     collection_name: str = "llamastack_kvstore"
 
-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
         return ["pymongo"]
 
     @classmethod
     def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
         return {
             "type": "mongodb",
-            "namespace": None,
             "host": "${env.MONGODB_HOST:=localhost}",
             "port": "${env.MONGODB_PORT:=5432}",
             "db": "${env.MONGODB_DB}",
@@ -144,3 +140,21 @@ KVStoreConfig = Annotated[
     RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
     Field(discriminator="type", default=KVStoreType.sqlite.value),
 ]
+
+
+def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]:
+    """Get pip packages for KV store config, handling both dict and object cases."""
+    if isinstance(store_config, dict):
+        store_type = store_config.get("type")
+        if store_type == "sqlite":
+            return SqliteKVStoreConfig.pip_packages()
+        elif store_type == "postgres":
+            return PostgresKVStoreConfig.pip_packages()
+        elif store_type == "redis":
+            return RedisKVStoreConfig.pip_packages()
+        elif store_type == "mongodb":
+            return MongoDBKVStoreConfig.pip_packages()
+        else:
+            raise ValueError(f"Unknown KV store type: {store_type}")
+    else:
+        return store_config.pip_packages()
diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py
index d558a2a26..227c5abcd 100644
--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@@ -30,8 +30,8 @@ class SqlAlchemySqlStoreConfig(BaseModel):
     def engine_str(self) -> str: ...
 
     # TODO: move this when we have a better way to specify dependencies with internal APIs
-    @property
-    def pip_packages(self) -> list[str]:
+    @classmethod
+    def pip_packages(cls) -> list[str]:
         return ["sqlalchemy[asyncio]"]
 
 
@@ -48,20 +48,20 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
 
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
-        return cls(
-            type="sqlite",
-            db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
-        )
+        return {
+            "type": "sqlite",
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
+        }
 
-    @property
-    def pip_packages(self) -> list[str]:
-        return super().pip_packages + ["aiosqlite"]
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return super().pip_packages() + ["aiosqlite"]
 
 
 class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
     type: Literal["postgres"] = SqlStoreType.postgres.value
     host: str = "localhost"
-    port: str = "5432"
+    port: int = 5432
     db: str = "llamastack"
     user: str
     password: str | None = None
@@ -70,20 +70,20 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
     def engine_str(self) -> str:
         return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
 
-    @property
-    def pip_packages(self) -> list[str]:
-        return super().pip_packages + ["asyncpg"]
+    @classmethod
+    def pip_packages(cls) -> list[str]:
+        return super().pip_packages() + ["asyncpg"]
 
     @classmethod
     def sample_run_config(cls, **kwargs):
-        return cls(
-            type="postgres",
-            host="${env.POSTGRES_HOST:=localhost}",
-            port="${env.POSTGRES_PORT:=5432}",
-            db="${env.POSTGRES_DB:=llamastack}",
-            user="${env.POSTGRES_USER:=llamastack}",
-            password="${env.POSTGRES_PASSWORD:=llamastack}",
-        )
+        return {
+            "type": "postgres",
+            "host": "${env.POSTGRES_HOST:=localhost}",
+            "port": "${env.POSTGRES_PORT:=5432}",
+            "db": "${env.POSTGRES_DB:=llamastack}",
+            "user": "${env.POSTGRES_USER:=llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
+        }
 
 
 SqlStoreConfig = Annotated[
@@ -92,6 +92,20 @@ SqlStoreConfig = Annotated[
 ]
 
 
+def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
+    """Get pip packages for SQL store config, handling both dict and object cases."""
+    if isinstance(store_config, dict):
+        store_type = store_config.get("type")
+        if store_type == "sqlite":
+            return SqliteSqlStoreConfig.pip_packages()
+        elif store_type == "postgres":
+            return PostgresSqlStoreConfig.pip_packages()
+        else:
+            raise ValueError(f"Unknown SQL store type: {store_type}")
+    else:
+        return store_config.pip_packages()
+
+
 def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
     if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
         from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index f12c5bec5..068278c66 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -21,7 +21,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
   safety:
   - provider_id: bedrock
@@ -33,7 +32,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
       responses_store:
         type: sqlite
@@ -51,7 +49,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -59,14 +56,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index c3877ddce..305e9a20f 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -31,7 +31,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
   agents:
   - provider_id: meta-reference
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
       responses_store:
         type: sqlite
@@ -50,7 +48,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -58,14 +55,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml
index a38d09324..5a68af3e6 100644
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@@ -36,7 +36,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
       responses_store:
         type: sqlite
@@ -54,7 +53,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -62,14 +60,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
index 48639c772..1e1ef1ea9 100644
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@@ -39,7 +39,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
       responses_store:
         type: sqlite
@@ -57,7 +56,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -65,14 +63,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
index 13d43530b..6f5c56dd3 100644
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@@ -35,7 +35,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
       responses_store:
         type: sqlite
@@ -53,7 +52,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -61,14 +59,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index ecb53a18d..1233e2271 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -27,7 +27,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -45,7 +44,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
       responses_store:
         type: sqlite
@@ -63,7 +61,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -71,14 +68,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index 298d28d52..7f0bc49f5 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -27,7 +27,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -40,7 +39,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
       responses_store:
         type: sqlite
@@ -58,7 +56,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -66,14 +63,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index 13bb65ed2..351ca74f7 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
       responses_store:
         type: sqlite
@@ -57,7 +55,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -65,14 +62,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index b2bc6a8e9..63063ad91 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -31,7 +31,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -44,7 +43,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
       responses_store:
         type: sqlite
@@ -62,7 +60,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -70,14 +67,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index d62921ccc..4caf0db04 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
       responses_store:
         type: sqlite
@@ -57,7 +55,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -65,14 +62,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index d7ff4f446..a4bba1f76 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -31,7 +31,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -44,7 +43,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
       responses_store:
         type: sqlite
@@ -62,7 +60,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -70,14 +67,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index 19484cba6..23e4c1f28 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
       responses_store:
         type: sqlite
@@ -57,7 +55,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -65,14 +62,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml
index 3bfb284a3..77bbcfbc8 100644
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@@ -48,7 +48,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
       responses_store:
         type: sqlite
@@ -66,7 +65,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -74,14 +72,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index 46b3a33a6..2f5ee4062 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -41,7 +41,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -54,7 +53,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
       responses_store:
         type: sqlite
@@ -72,7 +70,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -80,14 +77,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index 033ec245a..cc119bf4d 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -31,7 +31,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -44,7 +43,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
       responses_store:
         type: sqlite
@@ -62,7 +60,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -70,14 +67,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
index 73783be98..7dcfd196d 100644
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -30,7 +30,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
   safety:
   - provider_id: nvidia
@@ -44,7 +43,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
       responses_store:
         type: sqlite
@@ -75,7 +73,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
   - provider_id: nvidia
     provider_type: remote::nvidia
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index af9d5904a..f69270fb5 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -25,7 +25,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
   safety:
   - provider_id: nvidia
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
       responses_store:
         type: sqlite
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index bad51de09..98db5fc98 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -25,7 +25,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -40,7 +39,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
       responses_store:
         type: sqlite
@@ -58,7 +56,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -66,14 +63,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index e1dea730e..38fb2bace 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -25,7 +25,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -38,7 +37,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
       responses_store:
         type: sqlite
@@ -56,7 +54,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -64,14 +61,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml
index 57ae6b9be..7b1ef8f10 100644
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@@ -62,7 +62,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
       responses_store:
         type: sqlite
@@ -80,7 +79,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -88,14 +86,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml
index 7a30f665c..5cd8a2930 100644
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -44,7 +43,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
       responses_store:
         type: sqlite
@@ -62,7 +60,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -70,14 +67,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml
index dc751ea20..5b6078953 100644
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
       responses_store:
         type: sqlite
@@ -57,7 +55,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -65,14 +62,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py
index 67ad35db7..ed69c22db 100644
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate:
                             provider_id="meta-reference",
                             provider_type="inline::meta-reference",
                             config=dict(
-                                service_name="${env.OTEL_SERVICE_NAME:=}",
+                                service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
                                 sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
                                 otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
                             ),
diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml
index dd20cc6ac..2b6b1a64f 100644
--- a/llama_stack/templates/postgres-demo/run.yaml
+++ b/llama_stack/templates/postgres-demo/run.yaml
@@ -51,7 +51,7 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:=}
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
       sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
       otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
   tool_runtime:
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 78fb22d38..a8d30904d 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -35,7 +35,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -48,7 +47,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
       responses_store:
         type: sqlite
@@ -59,7 +57,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -67,14 +64,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 1cc4596f3..58c4f867d 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -28,7 +28,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -41,7 +40,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
       responses_store:
         type: sqlite
@@ -52,7 +50,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -60,14 +57,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index 6163a58b3..ab6c70ae0 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -23,7 +23,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
   - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
@@ -49,7 +48,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
       responses_store:
         type: sqlite
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index 190030690..de8d35683 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -66,7 +66,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
   - provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
     provider_type: inline::sqlite-vec
@@ -78,7 +77,6 @@ providers:
       db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
   - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
@@ -111,7 +109,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
       responses_store:
         type: sqlite
@@ -129,7 +126,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -137,14 +133,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index 7914d4298..2a982bb62 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -234,7 +234,6 @@ def get_distribution_template() -> DistributionTemplate:
 
     default_models = get_model_registry(available_models)
 
-    postgres_store = PostgresSqlStoreConfig.sample_run_config()
     return DistributionTemplate(
         name=name,
         distro_type="self_hosted",
@@ -243,7 +242,7 @@ def get_distribution_template() -> DistributionTemplate:
         template_path=None,
         providers=providers,
         available_models_by_provider=available_models,
-        additional_pip_packages=postgres_store.pip_packages,
+        additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 7badff140..dceb13c8b 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.datasets import DatasetPurpose
 from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
+    LLAMA_STACK_RUN_CONFIG_VERSION,
     Api,
     BenchmarkInput,
     BuildConfig,
@@ -23,14 +24,15 @@ from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
     ShieldInput,
-    StackRunConfig,
     ToolGroupInput,
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
+from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
+from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
 
 
 def get_model_registry(
@@ -87,21 +89,24 @@ class RunConfigSettings(BaseModel):
     default_tool_groups: list[ToolGroupInput] | None = None
     default_datasets: list[DatasetInput] | None = None
     default_benchmarks: list[BenchmarkInput] | None = None
-    metadata_store: KVStoreConfig | None = None
-    inference_store: SqlStoreConfig | None = None
+    metadata_store: dict | None = None
+    inference_store: dict | None = None
 
     def run_config(
         self,
         name: str,
         providers: dict[str, list[str]],
         container_image: str | None = None,
-    ) -> StackRunConfig:
+    ) -> dict:
         provider_registry = get_provider_registry()
 
         provider_configs = {}
         for api_str, provider_types in providers.items():
             if api_providers := self.provider_overrides.get(api_str):
-                provider_configs[api_str] = api_providers
+                # Convert Provider objects to dicts for YAML serialization
+                provider_configs[api_str] = [
+                    p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers
+                ]
                 continue
 
             provider_configs[api_str] = []
@@ -128,33 +133,40 @@ class RunConfigSettings(BaseModel):
                         provider_id=provider_id,
                         provider_type=provider_type,
                         config=config,
-                    )
+                    ).model_dump(exclude_none=True)
                 )
 
         # Get unique set of APIs from providers
         apis = sorted(providers.keys())
 
-        return StackRunConfig(
-            image_name=name,
-            container_image=container_image,
-            apis=apis,
-            providers=provider_configs,
-            metadata_store=self.metadata_store
+        # Return a dict that matches StackRunConfig structure
+        return {
+            "version": LLAMA_STACK_RUN_CONFIG_VERSION,
+            "image_name": name,
+            "container_image": container_image,
+            "apis": apis,
+            "providers": provider_configs,
+            "metadata_store": self.metadata_store
             or SqliteKVStoreConfig.sample_run_config(
                 __distro_dir__=f"~/.llama/distributions/{name}",
                 db_name="registry.db",
             ),
-            inference_store=self.inference_store
+            "inference_store": self.inference_store
             or SqliteSqlStoreConfig.sample_run_config(
                 __distro_dir__=f"~/.llama/distributions/{name}",
                 db_name="inference_store.db",
             ),
-            models=self.default_models or [],
-            shields=self.default_shields or [],
-            tool_groups=self.default_tool_groups or [],
-            datasets=self.default_datasets or [],
-            benchmarks=self.default_benchmarks or [],
-        )
+            "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
+            "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
+            "vector_dbs": [],
+            "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
+            "scoring_fns": [],
+            "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
+            "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
+            "server": {
+                "port": 8321,
+            },
+        }
 
 
 class DistributionTemplate(BaseModel):
@@ -190,10 +202,12 @@ class DistributionTemplate(BaseModel):
             # TODO: This is a hack to get the dependencies for internal APIs into build
             # We should have a better way to do this by formalizing the concept of "internal" APIs
             # and providers, with a way to specify dependencies for them.
-            if run_config_.inference_store:
-                additional_pip_packages.extend(run_config_.inference_store.pip_packages)
-            if run_config_.metadata_store:
-                additional_pip_packages.extend(run_config_.metadata_store.pip_packages)
+
+            if run_config_.get("inference_store"):
+                additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
+
+            if run_config_.get("metadata_store"):
+                additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
 
         if self.additional_pip_packages:
             additional_pip_packages.extend(self.additional_pip_packages)
@@ -286,7 +300,7 @@ class DistributionTemplate(BaseModel):
             run_config = settings.run_config(self.name, self.providers, self.container_image)
             with open(yaml_output_dir / yaml_pth, "w") as f:
                 yaml.safe_dump(
-                    run_config.model_dump(exclude_none=True),
+                    {k: v for k, v in run_config.items() if v is not None},
                     f,
                     sort_keys=False,
                 )
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index c4f9ae7ef..c19b916d5 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
       responses_store:
         type: sqlite
@@ -57,7 +55,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -65,14 +62,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index 70e5872b3..f0197d74c 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -25,7 +25,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -38,7 +37,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
       responses_store:
         type: sqlite
@@ -56,7 +54,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -64,14 +61,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index 14f423855..b32c9ee8d 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -44,7 +43,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
       responses_store:
         type: sqlite
@@ -62,7 +60,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -70,14 +67,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index 38f1922c0..22c99f6cf 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -26,7 +26,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -39,7 +38,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
       responses_store:
         type: sqlite
@@ -57,7 +55,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -65,14 +62,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index 6854ad05c..6d122e180 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -30,7 +30,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -43,7 +42,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
       responses_store:
         type: sqlite
@@ -61,7 +59,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -69,14 +66,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml
index 8b8fc09c4..d80ee6329 100644
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@@ -27,7 +27,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
   safety:
   - provider_id: llama-guard
@@ -40,7 +39,6 @@ providers:
     config:
       persistence_store:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
       responses_store:
         type: sqlite
@@ -58,7 +56,6 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
@@ -66,14 +63,12 @@ providers:
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
-        namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
   scoring:
   - provider_id: basic

From f4950f4ef0965d21443660d0e3da1a853c30c197 Mon Sep 17 00:00:00 2001
From: Akram Ben Aissi <akram.benaissi@gmail.com>
Date: Thu, 3 Jul 2025 19:50:49 +0200
Subject: [PATCH 09/10] fix: AccessDeniedError leads to HTTP 500 instead of
 error 403 (#2595)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves access control error visibility issues where 500 errors were
returned instead of proper 403 responses with actionable error messages.

• Enhance AccessDeniedError with detailed context and improve exception
handling
• Enhanced AccessDeniedError class to include user, action, and resource
context
  - Added constructor parameters for action, resource, and user
- Generate detailed error messages showing user principal, attributes,
and attempted resource
- Backward compatible with existing usage (falls back to generic
message)

• Updated exception handling in server.py
  - Import AccessDeniedError from access_control module
  - Return proper 403 status codes with detailed error messages
- Separate handling for PermissionError (generic) vs AccessDeniedError
(detailed)

• Enhanced error context at raise sites
- Updated routing_tables/common.py to pass action, resource, and user
context
- Updated agents persistence to include context in access denied errors
  - Provides better debugging information for access control issues

• Added comprehensive unit tests
  - Created tests/unit/server/test_server.py with 13 test cases
  - Covers AccessDeniedError with and without context
- Tests all exception types (ValidationError, BadRequestError,
AuthenticationRequiredError, etc.)
  - Validates proper HTTP status codes and error message formats


# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan

```
server:
  port: 8321
    access_policy:
    - permit:
        principal: admin
        actions: [create, read, delete]
        when: user with admin in groups
    - permit:
        actions: [read]
        when: user with system:authenticated in roles
```
then:

```
curl --request POST --url http://localhost:8321/v1/vector-dbs \
  --header "Authorization: Bearer your-bearer" \
  --data '{
    "vector_db_id": "my_demo_vector_db",
    "embedding_model": "ibm-granite/granite-embedding-125m-english",
    "embedding_dimension": 768,
    "provider_id": "milvus"
  }'

```

depending if user is in group admin or not, you should get the
`AccessDeniedError`. Before this PR, this was leading to an error 500
and `Traceback` displayed in the logs.
After the PR, logs display a simpler error (unless DEBUG logging is set)
and a 403 Forbidden error is returned on the HTTP side.

---------

Signed-off-by: Akram Ben Aissi <<akram.benaissi@gmail.com>>
---
 .../access_control/access_control.py          |  24 ++-
 .../distribution/routing_tables/common.py     |   7 +-
 llama_stack/distribution/server/server.py     |   9 +-
 .../agents/meta_reference/persistence.py      |   2 +-
 tests/unit/fixtures.py                        |   8 +-
 .../agents/test_persistence_access_control.py |   3 +-
 .../providers/vector_io/test_sqlite_vec.py    |   2 +-
 tests/unit/server/test_access_control.py      |   5 +-
 tests/unit/server/test_server.py              | 187 ++++++++++++++++++
 9 files changed, 232 insertions(+), 15 deletions(-)
 create mode 100644 tests/unit/server/test_server.py

diff --git a/llama_stack/distribution/access_control/access_control.py b/llama_stack/distribution/access_control/access_control.py
index 84d506d8f..075152ce4 100644
--- a/llama_stack/distribution/access_control/access_control.py
+++ b/llama_stack/distribution/access_control/access_control.py
@@ -106,4 +106,26 @@ def is_action_allowed(
 
 
 class AccessDeniedError(RuntimeError):
-    pass
+    def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None):
+        self.action = action
+        self.resource = resource
+        self.user = user
+
+        message = _build_access_denied_message(action, resource, user)
+        super().__init__(message)
+
+
+def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str:
+    """Build detailed error message for access denied scenarios."""
+    if action and resource and user:
+        resource_info = f"{resource.type}::{resource.identifier}"
+        user_info = f"'{user.principal}'"
+        if user.attributes:
+            attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()])
+            user_info += f" (attributes: {attrs})"
+
+        message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'"
+    else:
+        message = "Insufficient permissions"
+
+    return message
diff --git a/llama_stack/distribution/routing_tables/common.py b/llama_stack/distribution/routing_tables/common.py
index b79c8a2a8..7f7de32fe 100644
--- a/llama_stack/distribution/routing_tables/common.py
+++ b/llama_stack/distribution/routing_tables/common.py
@@ -175,8 +175,9 @@ class CommonRoutingTableImpl(RoutingTable):
         return obj
 
     async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
-        if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()):
-            raise AccessDeniedError()
+        user = get_authenticated_user()
+        if not is_action_allowed(self.policy, "delete", obj, user):
+            raise AccessDeniedError("delete", obj, user)
         await self.dist_registry.delete(obj.type, obj.identifier)
         await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
 
@@ -193,7 +194,7 @@ class CommonRoutingTableImpl(RoutingTable):
         # If object supports access control but no attributes set, use creator's attributes
         creator = get_authenticated_user()
         if not is_action_allowed(self.policy, "create", obj, creator):
-            raise AccessDeniedError()
+            raise AccessDeniedError("create", obj, creator)
         if creator:
             obj.owner = creator
             logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py
index 83407a25f..681ab320d 100644
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@@ -9,6 +9,7 @@ import asyncio
 import functools
 import inspect
 import json
+import logging
 import os
 import ssl
 import sys
@@ -31,6 +32,7 @@ from openai import BadRequestError
 from pydantic import BaseModel, ValidationError
 
 from llama_stack.apis.common.responses import PaginatedResponse
+from llama_stack.distribution.access_control.access_control import AccessDeniedError
 from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
 from llama_stack.distribution.distribution import builtin_automatically_routed_apis
 from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
@@ -116,7 +118,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
         return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
     elif isinstance(exc, BadRequestError):
         return HTTPException(status_code=400, detail=str(exc))
-    elif isinstance(exc, PermissionError):
+    elif isinstance(exc, PermissionError | AccessDeniedError):
         return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
     elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
         return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
@@ -236,7 +238,10 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
                         result.url = route
                     return result
             except Exception as e:
-                logger.exception(f"Error executing endpoint {route=} {method=}")
+                if logger.isEnabledFor(logging.DEBUG):
+                    logger.exception(f"Error executing endpoint {route=} {method=}")
+                else:
+                    logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
                 raise translate_exception(e) from e
 
     sig = inspect.signature(func)
diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py
index 717387008..cda535937 100644
--- a/llama_stack/providers/inline/agents/meta_reference/persistence.py
+++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py
@@ -53,7 +53,7 @@ class AgentPersistence:
             identifier=name,  # should this be qualified in any way?
         )
         if not is_action_allowed(self.policy, "create", session_info, user):
-            raise AccessDeniedError()
+            raise AccessDeniedError("create", session_info, user)
 
         await self.kvstore.set(
             key=f"session:{self.agent_id}:{session_id}",
diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py
index 7174d2e78..4e50c5e08 100644
--- a/tests/unit/fixtures.py
+++ b/tests/unit/fixtures.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import pytest
+import pytest_asyncio
 
 from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
 
 
-@pytest.fixture(scope="function")
+@pytest_asyncio.fixture(scope="function")
 async def sqlite_kvstore(tmp_path):
     db_path = tmp_path / "test_kv.db"
     kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
@@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path):
     yield kvstore
 
 
-@pytest.fixture(scope="function")
+@pytest_asyncio.fixture(scope="function")
 async def disk_dist_registry(sqlite_kvstore):
     registry = DiskDistributionRegistry(sqlite_kvstore)
     await registry.initialize()
     yield registry
 
 
-@pytest.fixture(scope="function")
+@pytest_asyncio.fixture(scope="function")
 async def cached_disk_dist_registry(sqlite_kvstore):
     registry = CachedDiskDistributionRegistry(sqlite_kvstore)
     await registry.initialize()
diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py
index d5b876a09..656d1e53c 100644
--- a/tests/unit/providers/agents/test_persistence_access_control.py
+++ b/tests/unit/providers/agents/test_persistence_access_control.py
@@ -9,6 +9,7 @@ from datetime import datetime
 from unittest.mock import patch
 
 import pytest
+import pytest_asyncio
 
 from llama_stack.apis.agents import Turn
 from llama_stack.apis.inference import CompletionMessage, StopReason
@@ -16,7 +17,7 @@ from llama_stack.distribution.datatypes import User
 from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_setup(sqlite_kvstore):
     agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={})
     yield agent_persistence
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index bbac717c7..5d9d92cf3 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -148,7 +148,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime
     assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!"
 
 
-@pytest.fixture(scope="session")
+@pytest_asyncio.fixture(scope="session")
 async def sqlite_vec_adapter(sqlite_connection):
     config = type("Config", (object,), {"db_path": ":memory:"})  # Mock config with in-memory database
     adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None)
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index f9ad47b0c..af03ddacb 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -7,6 +7,7 @@
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
+import pytest_asyncio
 import yaml
 from pydantic import TypeAdapter, ValidationError
 
@@ -26,7 +27,7 @@ def _return_model(model):
     return model
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_setup(cached_disk_dist_registry):
     mock_inference = Mock()
     mock_inference.__provider_spec__ = MagicMock()
@@ -245,7 +246,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set
     assert model.identifier == "auto-access-model"
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 async def test_setup_with_access_policy(cached_disk_dist_registry):
     mock_inference = Mock()
     mock_inference.__provider_spec__ = MagicMock()
diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py
new file mode 100644
index 000000000..d17d58b8a
--- /dev/null
+++ b/tests/unit/server/test_server.py
@@ -0,0 +1,187 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import Mock
+
+from fastapi import HTTPException
+from openai import BadRequestError
+from pydantic import ValidationError
+
+from llama_stack.distribution.access_control.access_control import AccessDeniedError
+from llama_stack.distribution.datatypes import AuthenticationRequiredError
+from llama_stack.distribution.server.server import translate_exception
+
+
+class TestTranslateException:
+    """Test cases for the translate_exception function."""
+
+    def test_translate_access_denied_error(self):
+        """Test that AccessDeniedError is translated to 403 HTTP status."""
+        exc = AccessDeniedError()
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 403
+        assert result.detail == "Permission denied: Insufficient permissions"
+
+    def test_translate_access_denied_error_with_context(self):
+        """Test that AccessDeniedError with context includes detailed information."""
+        from llama_stack.distribution.datatypes import User
+
+        # Create mock user and resource
+        user = User("test-user", {"roles": ["user"], "teams": ["dev"]})
+
+        # Create a simple mock object that implements the ProtectedResource protocol
+        class MockResource:
+            def __init__(self, type: str, identifier: str, owner=None):
+                self.type = type
+                self.identifier = identifier
+                self.owner = owner
+
+        resource = MockResource("vector_db", "test-db")
+
+        exc = AccessDeniedError("create", resource, user)
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 403
+        assert "test-user" in result.detail
+        assert "vector_db::test-db" in result.detail
+        assert "create" in result.detail
+        assert "roles=['user']" in result.detail
+        assert "teams=['dev']" in result.detail
+
+    def test_translate_permission_error(self):
+        """Test that PermissionError is translated to 403 HTTP status."""
+        exc = PermissionError("Permission denied")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 403
+        assert result.detail == "Permission denied: Permission denied"
+
+    def test_translate_value_error(self):
+        """Test that ValueError is translated to 400 HTTP status."""
+        exc = ValueError("Invalid input")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 400
+        assert result.detail == "Invalid value: Invalid input"
+
+    def test_translate_bad_request_error(self):
+        """Test that BadRequestError is translated to 400 HTTP status."""
+        # Create a mock response for BadRequestError
+        mock_response = Mock()
+        mock_response.status_code = 400
+        mock_response.headers = {}
+
+        exc = BadRequestError("Bad request", response=mock_response, body="Bad request")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 400
+        assert result.detail == "Bad request"
+
+    def test_translate_authentication_required_error(self):
+        """Test that AuthenticationRequiredError is translated to 401 HTTP status."""
+        exc = AuthenticationRequiredError("Authentication required")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 401
+        assert result.detail == "Authentication required: Authentication required"
+
+    def test_translate_timeout_error(self):
+        """Test that TimeoutError is translated to 504 HTTP status."""
+        exc = TimeoutError("Operation timed out")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 504
+        assert result.detail == "Operation timed out: Operation timed out"
+
+    def test_translate_asyncio_timeout_error(self):
+        """Test that asyncio.TimeoutError is translated to 504 HTTP status."""
+        exc = TimeoutError()
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 504
+        assert result.detail == "Operation timed out: "
+
+    def test_translate_not_implemented_error(self):
+        """Test that NotImplementedError is translated to 501 HTTP status."""
+        exc = NotImplementedError("Not implemented")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 501
+        assert result.detail == "Not implemented: Not implemented"
+
+    def test_translate_validation_error(self):
+        """Test that ValidationError is translated to 400 HTTP status with proper format."""
+        # Create a mock validation error using proper Pydantic error format
+        exc = ValidationError.from_exception_data(
+            "TestModel",
+            [
+                {
+                    "loc": ("field", "nested"),
+                    "msg": "field required",
+                    "type": "missing",
+                }
+            ],
+        )
+
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 400
+        assert "errors" in result.detail
+        assert len(result.detail["errors"]) == 1
+        assert result.detail["errors"][0]["loc"] == ["field", "nested"]
+        assert result.detail["errors"][0]["msg"] == "Field required"
+        assert result.detail["errors"][0]["type"] == "missing"
+
+    def test_translate_generic_exception(self):
+        """Test that generic exceptions are translated to 500 HTTP status."""
+        exc = Exception("Unexpected error")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 500
+        assert result.detail == "Internal server error: An unexpected error occurred."
+
+    def test_translate_runtime_error(self):
+        """Test that RuntimeError is translated to 500 HTTP status."""
+        exc = RuntimeError("Runtime error")
+        result = translate_exception(exc)
+
+        assert isinstance(result, HTTPException)
+        assert result.status_code == 500
+        assert result.detail == "Internal server error: An unexpected error occurred."
+
+    def test_multiple_access_denied_scenarios(self):
+        """Test various scenarios that should result in 403 status codes."""
+        # Test AccessDeniedError (uses enhanced message)
+        exc1 = AccessDeniedError()
+        result1 = translate_exception(exc1)
+        assert isinstance(result1, HTTPException)
+        assert result1.status_code == 403
+        assert result1.detail == "Permission denied: Insufficient permissions"
+
+        # Test PermissionError (uses generic message)
+        exc2 = PermissionError("No permission")
+        result2 = translate_exception(exc2)
+        assert isinstance(result2, HTTPException)
+        assert result2.status_code == 403
+        assert result2.detail == "Permission denied: No permission"
+
+        exc3 = PermissionError("Access denied")
+        result3 = translate_exception(exc3)
+        assert isinstance(result3, HTTPException)
+        assert result3.status_code == 403
+        assert result3.detail == "Permission denied: Access denied"

From dae1fcd3c2bb0b440f29c2f7291a5d47c81b4051 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 3 Jul 2025 19:51:46 +0200
Subject: [PATCH 10/10] ci: let pytest run the distro server (#2586)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

* Use #2580 functionality to auto-start the server with the tests
* Reduce timeout to 30sec
* Print server logs on errors
* Pytest logs are collected to a file pytest.log

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/integration-tests.yml | 41 +++---------------
 tests/integration/fixtures/common.py    | 57 ++++++++++++++++++++++---
 2 files changed, 56 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 32e221128..0dc7a9889 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -25,7 +25,7 @@ jobs:
         # Listing tests manually since some of them currently fail
         # TODO: generate matrix list from tests/integration when fixed
         test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io]
-        client-type: [library, http]
+        client-type: [library, server]
         python-version: ["3.12", "3.13"]
       fail-fast: false # we want to run all tests regardless of failure
 
@@ -45,39 +45,6 @@ jobs:
         run: |
           uv run llama stack build --template ollama --image-type venv
 
-      - name: Start Llama Stack server in background
-        if: matrix.client-type == 'http'
-        env:
-          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
-        run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
-
-      - name: Wait for Llama Stack server to be ready
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Waiting for Llama Stack server..."
-          for i in {1..30}; do
-            if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
-              echo "Llama Stack server is up!"
-              exit 0
-            fi
-            sleep 1
-          done
-          echo "Llama Stack server failed to start"
-          cat server.log
-          exit 1
-
-      - name: Verify Ollama status is OK
-        if: matrix.client-type == 'http'
-        run: |
-          echo "Verifying Ollama status..."
-          ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
-          echo "Ollama status: $ollama_status"
-          if [ "$ollama_status" != "OK" ]; then
-            echo "Ollama health check failed"
-            exit 1
-          fi
-
       - name: Check Storage and Memory Available Before Tests
         if: ${{ always() }}
         run: |
@@ -92,12 +59,14 @@ jobs:
           if [ "${{ matrix.client-type }}" == "library" ]; then
             stack_config="ollama"
           else
-            stack_config="http://localhost:8321"
+            stack_config="server:ollama"
           fi
           uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
             -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
             --text-model="meta-llama/Llama-3.2-3B-Instruct" \
-            --embedding-model=all-MiniLM-L6-v2
+            --embedding-model=all-MiniLM-L6-v2 \
+            --color=yes \
+            --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
 
       - name: Check Storage and Memory Available After Tests
         if: ${{ always() }}
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 2d6092e44..ecd29484b 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -37,26 +37,42 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
 def start_llama_stack_server(config_name: str) -> subprocess.Popen:
     """Start a llama stack server with the given config."""
     cmd = ["llama", "stack", "run", config_name]
-
-    # Start server in background
-    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    devnull = open(os.devnull, "w")
+    process = subprocess.Popen(
+        cmd,
+        stdout=devnull,  # redirect stdout to devnull to prevent deadlock
+        stderr=devnull,  # redirect stderr to devnull to prevent deadlock
+        text=True,
+        env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"},
+    )
     return process
 
 
-def wait_for_server_ready(base_url: str, timeout: int = 120) -> bool:
+def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool:
     """Wait for the server to be ready by polling the health endpoint."""
     health_url = f"{base_url}/v1/health"
     start_time = time.time()
 
     while time.time() - start_time < timeout:
+        if process and process.poll() is not None:
+            print(f"Server process terminated with return code: {process.returncode}")
+            return False
+
         try:
             response = requests.get(health_url, timeout=5)
             if response.status_code == 200:
                 return True
         except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
             pass
+
+        # Print progress every 5 seconds
+        elapsed = time.time() - start_time
+        if int(elapsed) % 5 == 0 and elapsed > 0:
+            print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)")
+
         time.sleep(0.5)
 
+    print(f"Server failed to respond within {timeout} seconds")
     return False
 
 
@@ -179,11 +195,12 @@ def llama_stack_client(request, provider_data):
             server_process = start_llama_stack_server(config_name)
 
             # Wait for server to be ready
-            if not wait_for_server_ready(base_url, timeout=120):
+            if not wait_for_server_ready(base_url, timeout=30, process=server_process):
                 print("Server failed to start within timeout")
                 server_process.terminate()
                 raise RuntimeError(
-                    f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid."
+                    f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
+                    f"See server.log for details."
                 )
 
             print(f"Server is ready at {base_url}")
@@ -227,3 +244,31 @@ def llama_stack_client(request, provider_data):
 def openai_client(client_with_models):
     base_url = f"{client_with_models.base_url}/v1/openai/v1"
     return OpenAI(base_url=base_url, api_key="fake")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def cleanup_server_process(request):
+    """Cleanup server process at the end of the test session."""
+    yield  # Run tests
+
+    if hasattr(request.session, "_llama_stack_server_process"):
+        server_process = request.session._llama_stack_server_process
+        if server_process:
+            if server_process.poll() is None:
+                print("Terminating llama stack server process...")
+            else:
+                print(f"Server process already terminated with return code: {server_process.returncode}")
+                return
+            try:
+                server_process.terminate()
+                server_process.wait(timeout=10)
+                print("Server process terminated gracefully")
+            except subprocess.TimeoutExpired:
+                print("Server process did not terminate gracefully, killing it")
+                server_process.kill()
+                server_process.wait()
+                print("Server process killed")
+            except Exception as e:
+                print(f"Error during server cleanup: {e}")
+        else:
+            print("Server process not found - won't be able to cleanup")