From fa0b0c13d41a6125b743c774c8870a760a9d2ebe Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 25 Jun 2025 09:54:00 -0400
Subject: [PATCH 1/8] fix: Ollama should be optional in starter distro (#2482)

# What does this PR do?

Our starter distro required Ollama to be running (and a large list of
models available in that Ollama) to successfully start. This adjusts
things so that Ollama does not have to be running to use the starter
template / distro.

To accomplish this, a few changes were needed:

* The Ollama provider is now configurable whether it raises an Exception
or just logs a warning when it cannot reach the Ollama server on
startup. The default is to raise an exception (same as previous
behavior), but in the starter template we adjust this to just log a
warning so that we can bring the stack up without needing a running
Ollama server.

* The starter template no longer specifies a default list of models for
Ollama, as any models specified there need to actually be pulled and
available in Ollama. Instead, it adds a new
`OLLAMA_INFERENCE_MODEL` environment variable where users can provide an
optional model to register with the Ollama provider on startup.
Additional models can also be registered via the typical
`models.register(...)` at runtime.

* The vLLM template was adjusted to also allow an optional
`VLLM_INFERENCE_MODEL` specified on startup, so that the behavior
between vLLM and Ollama was consistent here to make it easy to get up
and running quickly.

* The default vector store was changed from sqlite-vec to faiss.
sqlite-vec can enabled via setting the `ENABLE_SQLITE_VEC` environment
variable, like we do for chromadb and pgvector. This is due to
sqlite-vec not shipping proper arm64 binaries, like we previously fixed
in #1530 for the ollama distribution.

## Test Plan

With this change, the following scenarios now work with the starter
template that did not before:

* no Ollama running
* Ollama running but not all of the Llama models pulled locally
* Ollama running with a custom model registered on startup
* vLLM running with a custom model registered on startup
* running the starter template on linux/arm64, like when running
containers on Mac without rosetta emulation

---------

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 docs/source/distributions/configuration.md    |  12 ++
 llama_stack/distribution/stack.py             |   9 +
 .../remote/inference/ollama/__init__.py       |   2 +-
 .../remote/inference/ollama/config.py         |  10 +-
 .../remote/inference/ollama/ollama.py         |  22 ++-
 .../providers/remote/inference/vllm/vllm.py   |   9 +-
 .../templates/ollama/run-with-safety.yaml     |   1 +
 llama_stack/templates/ollama/run.yaml         |   1 +
 llama_stack/templates/starter/run.yaml        | 170 ++----------------
 llama_stack/templates/starter/starter.py      |  58 +++++-
 10 files changed, 121 insertions(+), 173 deletions(-)

diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index dd73d93ea..4bc9b37e4 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -109,6 +109,18 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i
 
 What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
 
+If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below:
+
+```yaml
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL:__disabled__}
+  provider_id: ollama
+  provider_model_id: ${env.INFERENCE_MODEL:__disabled__}
+```
+
+The snippet above will only register this model if the environment variable `INFERENCE_MODEL` is set and non-empty. If the environment variable is not set, the model will not get registered at all.
+
 ## Server Configuration
 
 The `server` section configures the HTTP server that serves the Llama Stack APIs:
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 5a9708497..b33b0d3f7 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -98,6 +98,15 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
 
         method = getattr(impls[api], register_method)
         for obj in objects:
+            # In complex templates, like our starter template, we may have dynamic model ids
+            # given by environment variables. This allows those environment variables to have
+            # a default value of __disabled__ to skip registration of the model if not set.
+            if (
+                hasattr(obj, "provider_model_id")
+                and obj.provider_model_id is not None
+                and "__disabled__" in obj.provider_model_id
+            ):
+                continue
             # we want to maintain the type information in arguments to method.
             # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
             # we use model_dump() to find all the attrs and then getattr to get the still typed value.
diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/llama_stack/providers/remote/inference/ollama/__init__.py
index 073c31cde..491339451 100644
--- a/llama_stack/providers/remote/inference/ollama/__init__.py
+++ b/llama_stack/providers/remote/inference/ollama/__init__.py
@@ -10,6 +10,6 @@ from .config import OllamaImplConfig
 async def get_adapter_impl(config: OllamaImplConfig, _deps):
     from .ollama import OllamaInferenceAdapter
 
-    impl = OllamaInferenceAdapter(config.url)
+    impl = OllamaInferenceAdapter(config)
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py
index 0e4aef0e1..37b827f4f 100644
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@@ -13,7 +13,13 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
 
 class OllamaImplConfig(BaseModel):
     url: str = DEFAULT_OLLAMA_URL
+    raise_on_connect_error: bool = True
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]:
-        return {"url": url}
+    def sample_run_config(
+        cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
+    ) -> dict[str, Any]:
+        return {
+            "url": url,
+            "raise_on_connect_error": raise_on_connect_error,
+        }
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index d51072fbf..2f51920b5 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -9,7 +9,6 @@ import uuid
 from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
 
-import httpx
 from ollama import AsyncClient  # type: ignore[attr-defined]
 from openai import AsyncOpenAI
 
@@ -57,6 +56,7 @@ from llama_stack.providers.datatypes import (
     HealthStatus,
     ModelsProtocolPrivate,
 )
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
 )
@@ -90,9 +90,10 @@ class OllamaInferenceAdapter(
     InferenceProvider,
     ModelsProtocolPrivate,
 ):
-    def __init__(self, url: str) -> None:
+    def __init__(self, config: OllamaImplConfig) -> None:
         self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
-        self.url = url
+        self.url = config.url
+        self.raise_on_connect_error = config.raise_on_connect_error
 
     @property
     def client(self) -> AsyncClient:
@@ -103,8 +104,13 @@ class OllamaInferenceAdapter(
         return AsyncOpenAI(base_url=f"{self.url}/v1", api_key="ollama")
 
     async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.url}`...")
-        await self.health()
+        logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
+        health_response = await self.health()
+        if health_response["status"] == HealthStatus.ERROR:
+            if self.raise_on_connect_error:
+                raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
+            else:
+                logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
 
     async def health(self) -> HealthResponse:
         """
@@ -117,10 +123,8 @@ class OllamaInferenceAdapter(
         try:
             await self.client.ps()
             return HealthResponse(status=HealthStatus.OK)
-        except httpx.ConnectError as e:
-            raise RuntimeError(
-                "Ollama Server is not running, start it using `ollama serve` in a separate terminal"
-            ) from e
+        except Exception as e:
+            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
 
     async def shutdown(self) -> None:
         pass
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 3424be6b4..ae04f206a 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
 
 import httpx
-from openai import AsyncOpenAI
+from openai import APIConnectionError, AsyncOpenAI
 from openai.types.chat.chat_completion_chunk import (
     ChatCompletionChunk as OpenAIChatCompletionChunk,
 )
@@ -461,7 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             model = await self.register_helper.register_model(model)
         except ValueError:
             pass  # Ignore statically unknown model, will check live listing
-        res = await client.models.list()
+        try:
+            res = await client.models.list()
+        except APIConnectionError as e:
+            raise ValueError(
+                f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
+            ) from e
         available_models = [m.id async for m in res]
         if model.provider_resource_id not in available_models:
             raise ValueError(
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 85d5c813b..2e1b7fdcc 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -18,6 +18,7 @@ providers:
     provider_type: remote::ollama
     config:
       url: ${env.OLLAMA_URL:http://localhost:11434}
+      raise_on_connect_error: true
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 2d10a99a4..8c2b17ef1 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -18,6 +18,7 @@ providers:
     provider_type: remote::ollama
     config:
       url: ${env.OLLAMA_URL:http://localhost:11434}
+      raise_on_connect_error: true
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index 960e96d01..30df39e5d 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -31,6 +31,7 @@ providers:
     provider_type: remote::ollama
     config:
       url: ${env.OLLAMA_URL:http://localhost:11434}
+      raise_on_connect_error: false
   - provider_id: anthropic
     provider_type: remote::anthropic
     config:
@@ -60,7 +61,14 @@ providers:
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
-  - provider_id: sqlite-vec
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/faiss_store.db
+  - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec}
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db
@@ -530,160 +538,15 @@ models:
   provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
   model_type: llm
 - metadata: {}
-  model_id: ollama/llama3.1:8b-instruct-fp16
+  model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:__disabled__}
   provider_id: ollama
-  provider_model_id: llama3.1:8b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.1-8B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.1:8b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:8b
-  provider_id: ollama
-  provider_model_id: llama3.1:8b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:70b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.1:70b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.1-70B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.1:70b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:70b
-  provider_id: ollama
-  provider_model_id: llama3.1:70b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:405b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.1:405b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: ollama
-  provider_model_id: llama3.1:405b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:405b
-  provider_id: ollama
-  provider_model_id: llama3.1:405b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:1b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2:1b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-1B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2:1b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:1b
-  provider_id: ollama
-  provider_model_id: llama3.2:1b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:3b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2:3b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-3B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2:3b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:3b
-  provider_id: ollama
-  provider_model_id: llama3.2:3b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:11b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:11b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:11b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:latest
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:latest
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:90b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:90b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:90b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:90b
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:90b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.3:70b
-  provider_id: ollama
-  provider_model_id: llama3.3:70b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.3-70B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.3:70b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama-guard3:8b
-  provider_id: ollama
-  provider_model_id: llama-guard3:8b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-Guard-3-8B
-  provider_id: ollama
-  provider_model_id: llama-guard3:8b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama-guard3:1b
-  provider_id: ollama
-  provider_model_id: llama-guard3:1b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-Guard-3-1B
-  provider_id: ollama
-  provider_model_id: llama-guard3:1b
+  provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:__disabled__}
   model_type: llm
 - metadata:
-    embedding_dimension: 384
-    context_length: 512
-  model_id: ollama/all-minilm:latest
+    embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:384}
+  model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:__disabled__}
   provider_id: ollama
-  provider_model_id: all-minilm:latest
-  model_type: embedding
-- metadata:
-    embedding_dimension: 384
-    context_length: 512
-  model_id: ollama/all-minilm
-  provider_id: ollama
-  provider_model_id: all-minilm:latest
-  model_type: embedding
-- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: ollama/nomic-embed-text
-  provider_id: ollama
-  provider_model_id: nomic-embed-text
+  provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:__disabled__}
   model_type: embedding
 - metadata: {}
   model_id: anthropic/claude-3-5-sonnet-latest
@@ -938,6 +801,11 @@ models:
   provider_id: sambanova
   provider_model_id: sambanova/Meta-Llama-Guard-3-8B
   model_type: llm
+- metadata: {}
+  model_id: vllm/${env.VLLM_INFERENCE_MODEL:__disabled__}
+  provider_id: vllm
+  provider_model_id: ${env.VLLM_INFERENCE_MODEL:__disabled__}
+  model_type: llm
 - metadata:
     embedding_dimension: 384
   model_id: all-MiniLM-L6-v2
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index 2a44a0a37..ec01d08e9 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -16,6 +16,7 @@ from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplCo
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
     SQLiteVectorIOConfig,
 )
@@ -36,9 +37,6 @@ from llama_stack.providers.remote.inference.groq.models import (
     MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
-from llama_stack.providers.remote.inference.ollama.models import (
-    MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES,
-)
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.models import (
     MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
@@ -85,8 +83,22 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
         ),
         (
             "ollama",
-            OLLAMA_MODEL_ENTRIES,
-            OllamaImplConfig.sample_run_config(),
+            [
+                ProviderModelEntry(
+                    provider_model_id="${env.OLLAMA_INFERENCE_MODEL:__disabled__}",
+                    model_type=ModelType.llm,
+                ),
+                ProviderModelEntry(
+                    provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:__disabled__}",
+                    model_type=ModelType.embedding,
+                    metadata={
+                        "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:384}",
+                    },
+                ),
+            ],
+            OllamaImplConfig.sample_run_config(
+                url="${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error=False
+            ),
         ),
         (
             "anthropic",
@@ -110,7 +122,12 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
         ),
         (
             "vllm",
-            [],
+            [
+                ProviderModelEntry(
+                    provider_model_id="${env.VLLM_INFERENCE_MODEL:__disabled__}",
+                    model_type=ModelType.llm,
+                ),
+            ],
             VLLMInferenceAdapterConfig.sample_run_config(
                 url="${env.VLLM_URL:http://localhost:8000/v1}",
             ),
@@ -153,7 +170,12 @@ def get_distribution_template() -> DistributionTemplate:
 
     vector_io_providers = [
         Provider(
-            provider_id="sqlite-vec",
+            provider_id="faiss",
+            provider_type="inline::faiss",
+            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}",
             provider_type="inline::sqlite-vec",
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
@@ -257,7 +279,27 @@ def get_distribution_template() -> DistributionTemplate:
             ),
             "VLLM_URL": (
                 "http://localhost:8000/v1",
-                "VLLM URL",
+                "vLLM URL",
+            ),
+            "VLLM_INFERENCE_MODEL": (
+                "",
+                "Optional vLLM Inference Model to register on startup",
+            ),
+            "OLLAMA_URL": (
+                "http://localhost:11434",
+                "Ollama URL",
+            ),
+            "OLLAMA_INFERENCE_MODEL": (
+                "",
+                "Optional Ollama Inference Model to register on startup",
+            ),
+            "OLLAMA_EMBEDDING_MODEL": (
+                "",
+                "Optional Ollama Embedding Model to register on startup",
+            ),
+            "OLLAMA_EMBEDDING_DIMENSION": (
+                "384",
+                "Ollama Embedding Dimension",
             ),
         },
     )

From 82f13fe83e5c9c802595b05fb2bc2ef4ddedbe3c Mon Sep 17 00:00:00 2001
From: Francisco Arceo <arceofrancisco@gmail.com>
Date: Wed, 25 Jun 2025 13:55:23 -0600
Subject: [PATCH 2/8] feat: Add ChunkMetadata to Chunk (#2497)

# What does this PR do?
Adding `ChunkMetadata` so we can properly delete embeddings later.

More specifically, this PR refactors and extends the chunk metadata
handling in the vector database and introduces a distinction between
metadata used for model context and backend-only metadata required for
chunk management, storage, and retrieval. It also improves chunk ID
generation and propagation throughout the stack, enhances test coverage,
and adds new utility modules.

```python
class ChunkMetadata(BaseModel):
    """
    `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
        will NOT be inserted into the context during inference, but is required for backend functionality.
        Use `metadata` in `Chunk` for metadata that will be used during inference.
    """
    document_id: str | None = None
    chunk_id: str | None = None
    source: str | None = None
    created_timestamp: int | None = None
    updated_timestamp: int | None = None
    chunk_window: str | None = None
    chunk_tokenizer: str | None = None
    chunk_embedding_model: str | None = None
    chunk_embedding_dimension: int | None = None
    content_token_count: int | None = None
    metadata_token_count: int | None = None
```
Eventually we can migrate the document_id out of the `metadata` field.
I've introduced the changes so that `ChunkMetadata` is backwards
compatible with `metadata`.

<!-- If resolving an issue, uncomment and update the line below -->
Closes https://github.com/meta-llama/llama-stack/issues/2501

## Test Plan
Added unit tests

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 docs/_static/llama-stack-spec.html            | 205 ++++++++++--------
 docs/_static/llama-stack-spec.yaml            | 171 +++++++++------
 llama_stack/apis/vector_io/vector_io.py       |  62 +++++-
 .../inline/tool_runtime/rag/memory.py         |  21 +-
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  35 +--
 .../remote/vector_io/qdrant/qdrant.py         |   4 +-
 .../providers/utils/memory/vector_store.py    |  24 +-
 .../providers/utils/vector_io/__init__.py     |   5 +
 .../providers/utils/vector_io/chunk_utils.py  |  14 ++
 tests/unit/providers/vector_io/conftest.py    |  16 +-
 .../providers/vector_io/test_chunk_utils.py   |  66 ++++++
 tests/unit/providers/vector_io/test_qdrant.py |   2 +-
 .../providers/vector_io/test_sqlite_vec.py    |  38 ++--
 tests/unit/rag/test_rag_query.py              |  45 +++-
 14 files changed, 490 insertions(+), 218 deletions(-)
 create mode 100644 llama_stack/providers/utils/vector_io/__init__.py
 create mode 100644 llama_stack/providers/utils/vector_io/chunk_utils.py
 create mode 100644 tests/unit/providers/vector_io/test_chunk_utils.py

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index affc426d6..801e8dc33 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -11190,6 +11190,115 @@
                 ],
                 "title": "InsertRequest"
             },
+            "Chunk": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Metadata associated with the chunk that will be used in the model context during inference."
+                    },
+                    "embedding": {
+                        "type": "array",
+                        "items": {
+                            "type": "number"
+                        },
+                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
+                    },
+                    "stored_chunk_id": {
+                        "type": "string",
+                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
+                    },
+                    "chunk_metadata": {
+                        "$ref": "#/components/schemas/ChunkMetadata",
+                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "metadata"
+                ],
+                "title": "Chunk",
+                "description": "A chunk of content that can be inserted into a vector database."
+            },
+            "ChunkMetadata": {
+                "type": "object",
+                "properties": {
+                    "chunk_id": {
+                        "type": "string",
+                        "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content."
+                    },
+                    "document_id": {
+                        "type": "string",
+                        "description": "The ID of the document this chunk belongs to."
+                    },
+                    "source": {
+                        "type": "string",
+                        "description": "The source of the content, such as a URL, file path, or other identifier."
+                    },
+                    "created_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was created."
+                    },
+                    "updated_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was last updated."
+                    },
+                    "chunk_window": {
+                        "type": "string",
+                        "description": "The window of the chunk, which can be used to group related chunks together."
+                    },
+                    "chunk_tokenizer": {
+                        "type": "string",
+                        "description": "The tokenizer used to create the chunk. Default is Tiktoken."
+                    },
+                    "chunk_embedding_model": {
+                        "type": "string",
+                        "description": "The embedding model used to create the chunk's embedding."
+                    },
+                    "chunk_embedding_dimension": {
+                        "type": "integer",
+                        "description": "The dimension of the embedding vector for the chunk."
+                    },
+                    "content_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the content of the chunk."
+                    },
+                    "metadata_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the metadata of the chunk."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "ChunkMetadata",
+                "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that     will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`     is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.     Use `Chunk.metadata` for metadata that will be used in the context during inference."
+            },
             "InsertChunksRequest": {
                 "type": "object",
                 "properties": {
@@ -11200,53 +11309,7 @@
                     "chunks": {
                         "type": "array",
                         "items": {
-                            "type": "object",
-                            "properties": {
-                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent",
-                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
-                                },
-                                "metadata": {
-                                    "type": "object",
-                                    "additionalProperties": {
-                                        "oneOf": [
-                                            {
-                                                "type": "null"
-                                            },
-                                            {
-                                                "type": "boolean"
-                                            },
-                                            {
-                                                "type": "number"
-                                            },
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "array"
-                                            },
-                                            {
-                                                "type": "object"
-                                            }
-                                        ]
-                                    },
-                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
-                                },
-                                "embedding": {
-                                    "type": "array",
-                                    "items": {
-                                        "type": "number"
-                                    },
-                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "content",
-                                "metadata"
-                            ],
-                            "title": "Chunk",
-                            "description": "A chunk of content that can be inserted into a vector database."
+                            "$ref": "#/components/schemas/Chunk"
                         },
                         "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
                     },
@@ -14671,53 +14734,7 @@
                     "chunks": {
                         "type": "array",
                         "items": {
-                            "type": "object",
-                            "properties": {
-                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent",
-                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
-                                },
-                                "metadata": {
-                                    "type": "object",
-                                    "additionalProperties": {
-                                        "oneOf": [
-                                            {
-                                                "type": "null"
-                                            },
-                                            {
-                                                "type": "boolean"
-                                            },
-                                            {
-                                                "type": "number"
-                                            },
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "array"
-                                            },
-                                            {
-                                                "type": "object"
-                                            }
-                                        ]
-                                    },
-                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
-                                },
-                                "embedding": {
-                                    "type": "array",
-                                    "items": {
-                                        "type": "number"
-                                    },
-                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "content",
-                                "metadata"
-                            ],
-                            "title": "Chunk",
-                            "description": "A chunk of content that can be inserted into a vector database."
+                            "$ref": "#/components/schemas/Chunk"
                         }
                     },
                     "scores": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 1e1293dc2..b736cd904 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -7867,6 +7867,107 @@ components:
         - vector_db_id
         - chunk_size_in_tokens
       title: InsertRequest
+    Chunk:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The content of the chunk, which can be interleaved text, images, or other
+            types.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Metadata associated with the chunk that will be used in the model context
+            during inference.
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding for the chunk. If not provided, it will be computed
+            later.
+        stored_chunk_id:
+          type: string
+          description: >-
+            The chunk ID that is stored in the vector database. Used for backend functionality.
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: >-
+            Metadata for the chunk that will NOT be used in the context during inference.
+            The `chunk_metadata` is required backend functionality.
+      additionalProperties: false
+      required:
+        - content
+        - metadata
+      title: Chunk
+      description: >-
+        A chunk of content that can be inserted into a vector database.
+    ChunkMetadata:
+      type: object
+      properties:
+        chunk_id:
+          type: string
+          description: >-
+            The ID of the chunk. If not set, it will be generated based on the document
+            ID and content.
+        document_id:
+          type: string
+          description: >-
+            The ID of the document this chunk belongs to.
+        source:
+          type: string
+          description: >-
+            The source of the content, such as a URL, file path, or other identifier.
+        created_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was created.
+        updated_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was last updated.
+        chunk_window:
+          type: string
+          description: >-
+            The window of the chunk, which can be used to group related chunks together.
+        chunk_tokenizer:
+          type: string
+          description: >-
+            The tokenizer used to create the chunk. Default is Tiktoken.
+        chunk_embedding_model:
+          type: string
+          description: >-
+            The embedding model used to create the chunk's embedding.
+        chunk_embedding_dimension:
+          type: integer
+          description: >-
+            The dimension of the embedding vector for the chunk.
+        content_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the content of the chunk.
+        metadata_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the metadata of the chunk.
+      additionalProperties: false
+      title: ChunkMetadata
+      description: >-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
+        information about the chunk that     will not be used in the context during
+        inference, but is required for backend functionality. The `ChunkMetadata`     is
+        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
+        expected to change after.     Use `Chunk.metadata` for metadata that will
+        be used in the context during inference.
     InsertChunksRequest:
       type: object
       properties:
@@ -7877,40 +7978,7 @@ components:
         chunks:
           type: array
           items:
-            type: object
-            properties:
-              content:
-                $ref: '#/components/schemas/InterleavedContent'
-                description: >-
-                  The content of the chunk, which can be interleaved text, images,
-                  or other types.
-              metadata:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  Metadata associated with the chunk, such as document ID, source,
-                  or other relevant information.
-              embedding:
-                type: array
-                items:
-                  type: number
-                description: >-
-                  Optional embedding for the chunk. If not provided, it will be computed
-                  later.
-            additionalProperties: false
-            required:
-              - content
-              - metadata
-            title: Chunk
-            description: >-
-              A chunk of content that can be inserted into a vector database.
+            $ref: '#/components/schemas/Chunk'
           description: >-
             The chunks to insert. Each `Chunk` should contain content which can be
             interleaved text, images, or other types. `metadata`: `dict[str, Any]`
@@ -10231,40 +10299,7 @@ components:
         chunks:
           type: array
           items:
-            type: object
-            properties:
-              content:
-                $ref: '#/components/schemas/InterleavedContent'
-                description: >-
-                  The content of the chunk, which can be interleaved text, images,
-                  or other types.
-              metadata:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  Metadata associated with the chunk, such as document ID, source,
-                  or other relevant information.
-              embedding:
-                type: array
-                items:
-                  type: number
-                description: >-
-                  Optional embedding for the chunk. If not provided, it will be computed
-                  later.
-            additionalProperties: false
-            required:
-              - content
-              - metadata
-            title: Chunk
-            description: >-
-              A chunk of content that can be inserted into a vector database.
+            $ref: '#/components/schemas/Chunk'
         scores:
           type: array
           items:
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index d6de0108c..2d4131315 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -8,6 +8,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import uuid
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
@@ -15,21 +16,80 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
 from llama_stack.strong_typing.schema import register_schema
 
 
+@json_schema_type
+class ChunkMetadata(BaseModel):
+    """
+    `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+        will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+        is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+        Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content.
+    :param document_id: The ID of the document this chunk belongs to.
+    :param source: The source of the content, such as a URL, file path, or other identifier.
+    :param created_timestamp: An optional timestamp indicating when the chunk was created.
+    :param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
+    :param chunk_window: The window of the chunk, which can be used to group related chunks together.
+    :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
+    :param chunk_embedding_model: The embedding model used to create the chunk's embedding.
+    :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
+    :param content_token_count: The number of tokens in the content of the chunk.
+    :param metadata_token_count: The number of tokens in the metadata of the chunk.
+    """
+
+    chunk_id: str | None = None
+    document_id: str | None = None
+    source: str | None = None
+    created_timestamp: int | None = None
+    updated_timestamp: int | None = None
+    chunk_window: str | None = None
+    chunk_tokenizer: str | None = None
+    chunk_embedding_model: str | None = None
+    chunk_embedding_dimension: int | None = None
+    content_token_count: int | None = None
+    metadata_token_count: int | None = None
+
+
+@json_schema_type
 class Chunk(BaseModel):
     """
     A chunk of content that can be inserted into a vector database.
     :param content: The content of the chunk, which can be interleaved text, images, or other types.
     :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
-    :param metadata: Metadata associated with the chunk, such as document ID, source, or other relevant information.
+    :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
+    :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
+    :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
+        The `chunk_metadata` is required backend functionality.
     """
 
     content: InterleavedContent
     metadata: dict[str, Any] = Field(default_factory=dict)
     embedding: list[float] | None = None
+    # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
+    stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
+    chunk_metadata: ChunkMetadata | None = None
+
+    model_config = {"populate_by_name": True}
+
+    def model_post_init(self, __context):
+        # Extract chunk_id from metadata if present
+        if self.metadata and "chunk_id" in self.metadata:
+            self.stored_chunk_id = self.metadata.pop("chunk_id")
+
+    @property
+    def chunk_id(self) -> str:
+        """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
+        if self.stored_chunk_id:
+            return self.stored_chunk_id
+
+        if "document_id" in self.metadata:
+            return generate_chunk_id(self.metadata["document_id"], str(self.content))
+
+        return generate_chunk_id(str(uuid.uuid4()), str(self.content))
 
 
 @json_schema_type
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 7f4fe5dbd..6a7c7885c 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -81,6 +81,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
         chunks = []
         for doc in documents:
             content = await content_from_doc(doc)
+            # TODO: we should add enrichment here as URLs won't be added to the metadata by default
             chunks.extend(
                 make_overlapped_chunks(
                     doc.document_id,
@@ -157,8 +158,24 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                 )
                 break
 
-            metadata_subset = {k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]}
-            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset)
+            # Add useful keys from chunk_metadata to metadata and remove some from metadata
+            chunk_metadata_keys_to_include_from_context = [
+                "chunk_id",
+                "document_id",
+                "source",
+            ]
+            metadata_keys_to_exclude_from_context = [
+                "token_count",
+                "metadata_token_count",
+            ]
+            metadata_for_context = {}
+            for k in chunk_metadata_keys_to_include_from_context:
+                metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
+            for k in metadata:
+                if k not in metadata_keys_to_exclude_from_context:
+                    metadata_for_context[k] = metadata[k]
+
+            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
             picked.append(TextContentItem(text=text_content))
 
         picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index d832e56f5..3b3c5f486 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -5,12 +5,10 @@
 # the root directory of this source tree.
 
 import asyncio
-import hashlib
 import json
 import logging
 import sqlite3
 import struct
-import uuid
 from typing import Any
 
 import numpy as np
@@ -201,10 +199,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                     batch_embeddings = embeddings[i : i + batch_size]
 
                     # Insert metadata
-                    metadata_data = [
-                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json())
-                        for chunk in batch_chunks
-                    ]
+                    metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks]
                     cur.executemany(
                         f"""
                         INSERT INTO {self.metadata_table} (id, chunk)
@@ -218,7 +213,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                     embedding_data = [
                         (
                             (
-                                generate_chunk_id(chunk.metadata["document_id"], chunk.content),
+                                chunk.chunk_id,
                                 serialize_vector(emb.tolist()),
                             )
                         )
@@ -230,10 +225,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                     )
 
                     # Insert FTS content
-                    fts_data = [
-                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.content)
-                        for chunk in batch_chunks
-                    ]
+                    fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
                     # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
                     cur.executemany(
                         f"DELETE FROM {self.fts_table} WHERE id = ?;",
@@ -381,13 +373,12 @@ class SQLiteVecIndex(EmbeddingIndex):
         vector_response = await self.query_vector(embedding, k, score_threshold)
         keyword_response = await self.query_keyword(query_string, k, score_threshold)
 
-        # Convert responses to score dictionaries using generate_chunk_id
+        # Convert responses to score dictionaries using chunk_id
         vector_scores = {
-            generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score
-            for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
+            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
         }
         keyword_scores = {
-            generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score
+            chunk.chunk_id: score
             for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
         }
 
@@ -408,13 +399,7 @@ class SQLiteVecIndex(EmbeddingIndex):
         filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
 
         # Create a map of chunk_id to chunk for both responses
-        chunk_map = {}
-        for c in vector_response.chunks:
-            chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content))
-            chunk_map[chunk_id] = c
-        for c in keyword_response.chunks:
-            chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content))
-            chunk_map[chunk_id] = c
+        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
 
         # Use the map to look up chunks by their IDs
         chunks = []
@@ -757,9 +742,3 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
         if vector_db_id not in self.cache:
             raise ValueError(f"Vector DB {vector_db_id} not found")
         return await self.cache[vector_db_id].query_chunks(query, params)
-
-
-def generate_chunk_id(document_id: str, chunk_text: str) -> str:
-    """Generate a unique chunk ID using a hash of document ID and chunk text."""
-    hash_input = f"{document_id}:{chunk_text}".encode()
-    return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index e9d6eec22..09ea08fa0 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -70,8 +70,8 @@ class QdrantIndex(EmbeddingIndex):
             )
 
         points = []
-        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
-            chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
+        for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
+            chunk_id = chunk.chunk_id
             points.append(
                 PointStruct(
                     id=convert_id(chunk_id),
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index a6e420feb..ab204a75a 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -7,6 +7,7 @@ import base64
 import io
 import logging
 import re
+import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any
@@ -23,12 +24,13 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
+from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
+from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
 
 log = logging.getLogger(__name__)
 
@@ -148,6 +150,7 @@ async def content_from_doc(doc: RAGDocument) -> str:
 def make_overlapped_chunks(
     document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any]
 ) -> list[Chunk]:
+    default_tokenizer = "DEFAULT_TIKTOKEN_TOKENIZER"
     tokenizer = Tokenizer.get_instance()
     tokens = tokenizer.encode(text, bos=False, eos=False)
     try:
@@ -161,16 +164,32 @@ def make_overlapped_chunks(
     for i in range(0, len(tokens), window_len - overlap_len):
         toks = tokens[i : i + window_len]
         chunk = tokenizer.decode(toks)
+        chunk_id = generate_chunk_id(chunk, text)
         chunk_metadata = metadata.copy()
+        chunk_metadata["chunk_id"] = chunk_id
         chunk_metadata["document_id"] = document_id
         chunk_metadata["token_count"] = len(toks)
         chunk_metadata["metadata_token_count"] = len(metadata_tokens)
 
+        backend_chunk_metadata = ChunkMetadata(
+            chunk_id=chunk_id,
+            document_id=document_id,
+            source=metadata.get("source", None),
+            created_timestamp=metadata.get("created_timestamp", int(time.time())),
+            updated_timestamp=int(time.time()),
+            chunk_window=f"{i}-{i + len(toks)}",
+            chunk_tokenizer=default_tokenizer,
+            chunk_embedding_model=None,  # This will be set in `VectorDBWithIndex.insert_chunks`
+            content_token_count=len(toks),
+            metadata_token_count=len(metadata_tokens),
+        )
+
         # chunk is a string
         chunks.append(
             Chunk(
                 content=chunk,
                 metadata=chunk_metadata,
+                chunk_metadata=backend_chunk_metadata,
             )
         )
 
@@ -237,6 +256,9 @@ class VectorDBWithIndex:
         for i, c in enumerate(chunks):
             if c.embedding is None:
                 chunks_to_embed.append(c)
+                if c.chunk_metadata:
+                    c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model
+                    c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension
             else:
                 _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
 
diff --git a/llama_stack/providers/utils/vector_io/__init__.py b/llama_stack/providers/utils/vector_io/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/providers/utils/vector_io/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/providers/utils/vector_io/chunk_utils.py b/llama_stack/providers/utils/vector_io/chunk_utils.py
new file mode 100644
index 000000000..68cf11cad
--- /dev/null
+++ b/llama_stack/providers/utils/vector_io/chunk_utils.py
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import hashlib
+import uuid
+
+
+def generate_chunk_id(document_id: str, chunk_text: str) -> str:
+    """Generate a unique chunk ID using a hash of document ID and chunk text."""
+    hash_input = f"{document_id}:{chunk_text}".encode()
+    return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 3bcd0613f..5eaca8a25 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,7 +9,7 @@ import random
 import numpy as np
 import pytest
 
-from llama_stack.apis.vector_io import Chunk
+from llama_stack.apis.vector_io import Chunk, ChunkMetadata
 
 EMBEDDING_DIMENSION = 384
 
@@ -33,6 +33,20 @@ def sample_chunks():
         for j in range(k)
         for i in range(n)
     ]
+    sample.extend(
+        [
+            Chunk(
+                content=f"Sentence {i} from document {j + k}",
+                chunk_metadata=ChunkMetadata(
+                    document_id=f"document-{j + k}",
+                    chunk_id=f"document-{j}-chunk-{i}",
+                    source=f"example source-{j + k}-{i}",
+                ),
+            )
+            for j in range(k)
+            for i in range(n)
+        ]
+    )
     return sample
 
 
diff --git a/tests/unit/providers/vector_io/test_chunk_utils.py b/tests/unit/providers/vector_io/test_chunk_utils.py
new file mode 100644
index 000000000..941928b6d
--- /dev/null
+++ b/tests/unit/providers/vector_io/test_chunk_utils.py
@@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
+
+# This test is a unit test for the chunk_utils.py helpers. This should only contain
+# tests which are specific to this file. More general (API-level) tests should be placed in
+# tests/integration/vector_io/
+#
+# How to run this test:
+#
+# pytest tests/unit/providers/vector_io/test_chunk_utils.py \
+# -v -s --tb=short --disable-warnings --asyncio-mode=auto
+
+
+def test_generate_chunk_id():
+    chunks = [
+        Chunk(content="test", metadata={"document_id": "doc-1"}),
+        Chunk(content="test ", metadata={"document_id": "doc-1"}),
+        Chunk(content="test 3", metadata={"document_id": "doc-1"}),
+    ]
+
+    chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
+    assert chunk_ids == [
+        "177a1368-f6a8-0c50-6e92-18677f2c3de3",
+        "bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
+        "f68df25d-d9aa-ab4d-5684-64a233add20d",
+    ]
+
+
+def test_chunk_id():
+    # Test with existing chunk ID
+    chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
+    assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350"
+
+    # Test with document ID in metadata
+    chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
+    assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test")
+
+    # Test chunks with ChunkMetadata
+    chunk_with_metadata = Chunk(
+        content="test",
+        metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"},
+        chunk_metadata=ChunkMetadata(document_id="document_1"),
+    )
+    assert chunk_with_metadata.chunk_id == "chunk-id-1"
+
+    # Test with no ID or document ID
+    chunk_without_id = Chunk(content="test")
+    generated_id = chunk_without_id.chunk_id
+    assert isinstance(generated_id, str) and len(generated_id) == 36  # Should be a valid UUID
+
+
+def test_stored_chunk_id_alias():
+    # Test with existing chunk ID alias
+    chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"})
+    assert chunk_with_alias.chunk_id == "chunk-id-1"
+    serialized_chunk = chunk_with_alias.model_dump()
+    assert serialized_chunk["stored_chunk_id"] == "chunk-id-1"
+    # showing chunk_id is not serialized (i.e., a computed field)
+    assert "chunk_id" not in serialized_chunk
+    assert chunk_with_alias.stored_chunk_id == "chunk-id-1"
diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py
index 607eccb24..6902c8850 100644
--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@@ -81,7 +81,7 @@ __QUERY = "Sample query"
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)])
+@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)])
 async def test_qdrant_adapter_returns_expected_chunks(
     qdrant_adapter: QdrantVectorIOAdapter,
     vector_db_id,
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 6424b9e86..bbac717c7 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -15,7 +15,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
     SQLiteVecVectorIOAdapter,
     _create_sqlite_connection,
-    generate_chunk_id,
 )
 
 # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain
@@ -65,6 +64,14 @@ async def test_query_chunks_vector(sqlite_vec_index, sample_chunks, sample_embed
     assert len(response.chunks) == 2
 
 
+@pytest.mark.xfail(reason="Chunk Metadata not yet supported for SQLite-vec", strict=True)
+async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks, sample_embeddings):
+    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
+    query_embedding = sample_embeddings[0]
+    response = await sqlite_vec_index.query_vector(query_embedding, k=2, score_threshold=0.0)
+    assert response.chunks[-1].chunk_metadata == sample_chunks[-1].chunk_metadata
+
+
 @pytest.mark.asyncio
 async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings):
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -150,21 +157,6 @@ async def sqlite_vec_adapter(sqlite_connection):
     await adapter.shutdown()
 
 
-def test_generate_chunk_id():
-    chunks = [
-        Chunk(content="test", metadata={"document_id": "doc-1"}),
-        Chunk(content="test ", metadata={"document_id": "doc-1"}),
-        Chunk(content="test 3", metadata={"document_id": "doc-1"}),
-    ]
-
-    chunk_ids = sorted([generate_chunk_id(chunk.metadata["document_id"], chunk.content) for chunk in chunks])
-    assert chunk_ids == [
-        "177a1368-f6a8-0c50-6e92-18677f2c3de3",
-        "bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
-        "f68df25d-d9aa-ab4d-5684-64a233add20d",
-    ]
-
-
 @pytest.mark.asyncio
 async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings):
     """Test hybrid search when keyword search returns no matches - should still return vector results."""
@@ -339,7 +331,7 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks
     # Verify scores are in descending order
     assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
     # Verify we get results from both the vector-similar document and keyword-matched document
-    doc_ids = {chunk.metadata["document_id"] for chunk in response.chunks}
+    doc_ids = {chunk.metadata.get("document_id") or chunk.chunk_metadata.document_id for chunk in response.chunks}
     assert "document-0" in doc_ids  # From vector search
     assert "document-2" in doc_ids  # From keyword search
 
@@ -364,7 +356,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization(
         reranker_params={"alpha": 1.0},
     )
     assert len(response.chunks) > 0  # Should get at least one result
-    assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks)
+    assert any(
+        "document-0"
+        in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else ""))
+        for chunk in response.chunks
+    )
 
     # alpha=0.0 (should behave like pure vector)
     response = await sqlite_vec_index.query_hybrid(
@@ -389,7 +385,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization(
         reranker_params={"alpha": 0.7},
     )
     assert len(response.chunks) > 0  # Should get at least one result
-    assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks)
+    assert any(
+        "document-0"
+        in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else ""))
+        for chunk in response.chunks
+    )
 
 
 @pytest.mark.asyncio
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index b9fd8cca4..d2dd1783b 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -4,10 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
+from llama_stack.apis.vector_io import (
+    Chunk,
+    ChunkMetadata,
+    QueryChunksResponse,
+)
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
 
 
@@ -17,3 +22,41 @@ class TestRagQuery:
         rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
         with pytest.raises(ValueError):
             await rag_tool.query(content=MagicMock(), vector_db_ids=[])
+
+    @pytest.mark.asyncio
+    async def test_query_chunk_metadata_handling(self):
+        rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
+        content = "test query content"
+        vector_db_ids = ["db1"]
+
+        chunk_metadata = ChunkMetadata(
+            document_id="doc1",
+            chunk_id="chunk1",
+            source="test_source",
+            metadata_token_count=5,
+        )
+        interleaved_content = MagicMock()
+        chunk = Chunk(
+            content=interleaved_content,
+            metadata={
+                "key1": "value1",
+                "token_count": 10,
+                "metadata_token_count": 5,
+                # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
+                "document_id": "doc1",
+            },
+            stored_chunk_id="chunk1",
+            chunk_metadata=chunk_metadata,
+        )
+
+        query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0])
+
+        rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response)
+        result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids)
+
+        assert result is not None
+        expected_metadata_string = (
+            "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}"
+        )
+        assert expected_metadata_string in result.content[1].text
+        assert result.content is not None

From 1d3f27fe5b359b8a6fde7741f72a6593ac0f0774 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Wed, 25 Jun 2025 14:43:37 -0700
Subject: [PATCH 3/8] fix: resume responses with tool call output (#2524)

# What does this PR do?
closes #2522

## Test Plan
added integration test
LLAMA_STACK_CONFIG=http://localhost:8321 pytest -v
tests/integration/agents/test_openai_responses.py --text-model
"accounts/fireworks/models/llama-v3p3-70b-instruct" -vv -k
'function_call'
---
 .../utils/inference/openai_compat.py          |  4 +-
 .../agents/test_openai_responses.py           | 53 +++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index ff95b12a7..01dfb8d61 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -1026,7 +1026,9 @@ def openai_messages_to_messages(
     return converted_messages
 
 
-def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam]):
+def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
+    if content is None:
+        return ""
     if isinstance(content, str):
         return content
     elif isinstance(content, list):
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index 26eac527b..b0b123c45 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -221,3 +221,56 @@ def test_list_response_input_items_with_limit_and_order(openai_client, client_wi
             assert hasattr(item, "type")
             assert item.type == "message"
             assert item.role in ["user", "assistant"]
+
+
+@pytest.mark.skip(reason="Tool calling is not reliable.")
+def test_function_call_output_response(openai_client, client_with_models, text_model_id):
+    """Test handling of function call outputs in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    # First create a response that triggers a function call
+    response = client.responses.create(
+        model=text_model_id,
+        input=[
+            {
+                "role": "user",
+                "content": "what's the weather in tokyo? You MUST call the `get_weather` function to find out.",
+            }
+        ],
+        tools=[
+            {
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get the weather in a given city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city to get the weather for"},
+                    },
+                },
+            }
+        ],
+        stream=False,
+    )
+
+    # Verify we got a function call
+    assert response.output[0].type == "function_call"
+    call_id = response.output[0].call_id
+
+    # Now send the function call output as a follow-up
+    response2 = client.responses.create(
+        model=text_model_id,
+        input=[{"type": "function_call_output", "call_id": call_id, "output": "sunny and warm"}],
+        previous_response_id=response.id,
+        stream=False,
+    )
+
+    # Verify the second response processed successfully
+    assert response2.id is not None
+    assert response2.output[0].type == "message"
+    assert (
+        "sunny" in response2.output[0].content[0].text.lower() or "warm" in response2.output[0].content[0].text.lower()
+    )

From 2d9fd041eb7c5d8d163d1f97e6e9942b6a366af4 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Wed, 25 Jun 2025 22:29:33 -0400
Subject: [PATCH 4/8] fix: annotations list and web_search_preview in Responses
 (#2520)

# What does this PR do?


These are a couple of fixes to get an example LangChain app working with
our OpenAI Responses API implementation.

The Responses API spec requires an annotations array in
`output[*].content[*].annotations` and we were not providing one. So,
this adds that as an empty list, even though we don't do anything to
populate it yet. This prevents an error from client libraries like
Langchain that expect this field to always exist, even if an empty list.

The other fix is `web_search_preview` is a valid name for the web search
tool in the Responses API, but we only responded to `web_search` or
`web_search_preview_2025_03_11`.


## Test Plan


The existing Responses unit tests were expanded to test these cases,
via:

```
pytest -sv tests/unit/providers/agents/meta_reference/test_openai_responses.py
```

The existing test_openai_responses.py integration tests still pass with
this change, tested as below with Fireworks:

```
uv run llama stack run llama_stack/templates/starter/run.yaml

LLAMA_STACK_CONFIG=http://localhost:8321 \
uv run pytest -sv tests/integration/agents/test_openai_responses.py \
  --text-model accounts/fireworks/models/llama4-scout-instruct-basic
```

Lastly, this example LangChain app now works with Llama stack (tested
with Ollama in the starter template in this case). This LangChain code
is using the example snippets for using Responses API at
https://python.langchain.com/docs/integrations/chat/openai/#responses-api

```python
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    base_url="http://localhost:8321/v1/openai/v1",
    api_key="fake",
    model="ollama/meta-llama/Llama-3.2-3B-Instruct",
)

tool = {"type": "web_search_preview"}
llm_with_tools = llm.bind_tools([tool])

response = llm_with_tools.invoke("What was a positive news story from today?")

print(response.content)
```

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 docs/_static/llama-stack-spec.html            | 154 +++++++++++++++++-
 docs/_static/llama-stack-spec.yaml            | 107 ++++++++++++
 llama_stack/apis/agents/openai_responses.py   |  54 +++++-
 .../agents/meta_reference/openai_responses.py |   3 +-
 .../meta_reference/test_openai_responses.py   |  73 +++++----
 5 files changed, 355 insertions(+), 36 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 801e8dc33..f9e4bb38e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7390,6 +7390,147 @@
                 ],
                 "title": "AgentTurnResponseTurnStartPayload"
             },
+            "OpenAIResponseAnnotationCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "url_citation",
+                        "default": "url_citation"
+                    },
+                    "end_index": {
+                        "type": "integer"
+                    },
+                    "start_index": {
+                        "type": "integer"
+                    },
+                    "title": {
+                        "type": "string"
+                    },
+                    "url": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "end_index",
+                    "start_index",
+                    "title",
+                    "url"
+                ],
+                "title": "OpenAIResponseAnnotationCitation"
+            },
+            "OpenAIResponseAnnotationContainerFileCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "container_file_citation",
+                        "default": "container_file_citation"
+                    },
+                    "container_id": {
+                        "type": "string"
+                    },
+                    "end_index": {
+                        "type": "integer"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    },
+                    "start_index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "container_id",
+                    "end_index",
+                    "file_id",
+                    "filename",
+                    "start_index"
+                ],
+                "title": "OpenAIResponseAnnotationContainerFileCitation"
+            },
+            "OpenAIResponseAnnotationFileCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_citation",
+                        "default": "file_citation"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    },
+                    "index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file_id",
+                    "filename",
+                    "index"
+                ],
+                "title": "OpenAIResponseAnnotationFileCitation"
+            },
+            "OpenAIResponseAnnotationFilePath": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_path",
+                        "default": "file_path"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file_id",
+                    "index"
+                ],
+                "title": "OpenAIResponseAnnotationFilePath"
+            },
+            "OpenAIResponseAnnotations": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
+                        "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
+                        "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
+                        "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                    }
+                }
+            },
             "OpenAIResponseInput": {
                 "oneOf": [
                     {
@@ -7764,6 +7905,10 @@
                                 "type": "string",
                                 "const": "web_search"
                             },
+                            {
+                                "type": "string",
+                                "const": "web_search_preview"
+                            },
                             {
                                 "type": "string",
                                 "const": "web_search_preview_2025_03_11"
@@ -7855,12 +8000,19 @@
                         "type": "string",
                         "const": "output_text",
                         "default": "output_text"
+                    },
+                    "annotations": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseAnnotations"
+                        }
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "text",
-                    "type"
+                    "type",
+                    "annotations"
                 ],
                 "title": "OpenAIResponseOutputMessageContentOutputText"
             },
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index b736cd904..9175c97fc 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5263,6 +5263,106 @@ components:
         - event_type
         - turn_id
       title: AgentTurnResponseTurnStartPayload
+    OpenAIResponseAnnotationCitation:
+      type: object
+      properties:
+        type:
+          type: string
+          const: url_citation
+          default: url_citation
+        end_index:
+          type: integer
+        start_index:
+          type: integer
+        title:
+          type: string
+        url:
+          type: string
+      additionalProperties: false
+      required:
+        - type
+        - end_index
+        - start_index
+        - title
+        - url
+      title: OpenAIResponseAnnotationCitation
+    "OpenAIResponseAnnotationContainerFileCitation":
+      type: object
+      properties:
+        type:
+          type: string
+          const: container_file_citation
+          default: container_file_citation
+        container_id:
+          type: string
+        end_index:
+          type: integer
+        file_id:
+          type: string
+        filename:
+          type: string
+        start_index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - container_id
+        - end_index
+        - file_id
+        - filename
+        - start_index
+      title: >-
+        OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file_citation
+          default: file_citation
+        file_id:
+          type: string
+        filename:
+          type: string
+        index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - file_id
+        - filename
+        - index
+      title: OpenAIResponseAnnotationFileCitation
+    OpenAIResponseAnnotationFilePath:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file_path
+          default: file_path
+        file_id:
+          type: string
+        index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - file_id
+        - index
+      title: OpenAIResponseAnnotationFilePath
+    OpenAIResponseAnnotations:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+      discriminator:
+        propertyName: type
+        mapping:
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+          container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
     OpenAIResponseInput:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@@ -5488,6 +5588,8 @@ components:
           oneOf:
             - type: string
               const: web_search
+            - type: string
+              const: web_search_preview
             - type: string
               const: web_search_preview_2025_03_11
           default: web_search
@@ -5547,10 +5649,15 @@ components:
           type: string
           const: output_text
           default: output_text
+        annotations:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseAnnotations'
       additionalProperties: false
       required:
         - text
         - type
+        - annotations
       title: >-
         OpenAIResponseOutputMessageContentOutputText
     "OpenAIResponseOutputMessageFileSearchToolCall":
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index addb72f14..27b85e2d6 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
 
 
+@json_schema_type
+class OpenAIResponseAnnotationFileCitation(BaseModel):
+    type: Literal["file_citation"] = "file_citation"
+    file_id: str
+    filename: str
+    index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationCitation(BaseModel):
+    type: Literal["url_citation"] = "url_citation"
+    end_index: int
+    start_index: int
+    title: str
+    url: str
+
+
+@json_schema_type
+class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
+    type: Literal["container_file_citation"] = "container_file_citation"
+    container_id: str
+    end_index: int
+    file_id: str
+    filename: str
+    start_index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationFilePath(BaseModel):
+    type: Literal["file_path"] = "file_path"
+    file_id: str
+    index: int
+
+
+OpenAIResponseAnnotations = Annotated[
+    OpenAIResponseAnnotationFileCitation
+    | OpenAIResponseAnnotationCitation
+    | OpenAIResponseAnnotationContainerFileCitation
+    | OpenAIResponseAnnotationFilePath,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
+
+
 @json_schema_type
 class OpenAIResponseOutputMessageContentOutputText(BaseModel):
     text: str
     type: Literal["output_text"] = "output_text"
+    annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
 
 
 OpenAIResponseOutputMessageContent = Annotated[
@@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 
 
+# Must match type Literals of OpenAIResponseInputToolWebSearch below
+WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
+
+
 @json_schema_type
 class OpenAIResponseInputToolWebSearch(BaseModel):
-    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+    # Must match values of WebSearchToolTypes above
+    type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
+        "web_search"
+    )
     # TODO: actually use search_context_size somewhere...
     search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
     # TODO: add user_location
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 4465a32fe..cf3293ed0 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -42,6 +42,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
+    WebSearchToolTypes,
 )
 from llama_stack.apis.common.content_types import TextContentItem
 from llama_stack.apis.inference.inference import (
@@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
             # TODO: Handle other tool types
             if input_tool.type == "function":
                 chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
-            elif input_tool.type == "web_search":
+            elif input_tool.type in WebSearchToolTypes:
                 tool_name = "web_search"
                 tool = await self.tool_groups_api.get_tool(tool_name)
                 if not tool:
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index a3d798083..7772dd2cc 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -27,6 +27,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
+    WebSearchToolTypes,
 )
 from llama_stack.apis.inference.inference import (
     OpenAIAssistantMessageParam,
@@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
     input_text = "What is the capital of Ireland?"
     model = "meta-llama/Llama-3.1-8B-Instruct"
 
-    mock_inference_api.openai_chat_completion.side_effect = [
-        fake_stream("tool_call_completion.yaml"),
-        fake_stream(),
-    ]
-
     openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
         identifier="web_search",
         provider_id="client",
@@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
     )
 
     # Execute
-    result = await openai_responses_impl.create_openai_response(
-        input=input_text,
-        model=model,
-        temperature=0.1,
-        tools=[
-            OpenAIResponseInputToolWebSearch(
-                name="web_search",
-            )
-        ],
-    )
+    for tool_name in WebSearchToolTypes:
+        # Reset mock states as we loop through each tool type
+        mock_inference_api.openai_chat_completion.side_effect = [
+            fake_stream("tool_call_completion.yaml"),
+            fake_stream(),
+        ]
+        openai_responses_impl.tool_groups_api.get_tool.reset_mock()
+        openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock()
+        openai_responses_impl.responses_store.store_response_object.reset_mock()
 
-    # Verify
-    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
-    assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
-    assert first_call.kwargs["tools"] is not None
-    assert first_call.kwargs["temperature"] == 0.1
+        result = await openai_responses_impl.create_openai_response(
+            input=input_text,
+            model=model,
+            temperature=0.1,
+            tools=[
+                OpenAIResponseInputToolWebSearch(
+                    name=tool_name,
+                )
+            ],
+        )
 
-    second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
-    assert second_call.kwargs["messages"][-1].content == "Dublin"
-    assert second_call.kwargs["temperature"] == 0.1
+        # Verify
+        first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+        assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
+        assert first_call.kwargs["tools"] is not None
+        assert first_call.kwargs["temperature"] == 0.1
 
-    openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
-    openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
-        tool_name="web_search",
-        kwargs={"query": "What is the capital of Ireland?"},
-    )
+        second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+        assert second_call.kwargs["messages"][-1].content == "Dublin"
+        assert second_call.kwargs["temperature"] == 0.1
 
-    openai_responses_impl.responses_store.store_response_object.assert_called_once()
+        openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
+        openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
+            tool_name="web_search",
+            kwargs={"query": "What is the capital of Ireland?"},
+        )
 
-    # Check that we got the content from our mocked tool execution result
-    assert len(result.output) >= 1
-    assert isinstance(result.output[1], OpenAIResponseMessage)
-    assert result.output[1].content[0].text == "Dublin"
+        openai_responses_impl.responses_store.store_response_object.assert_called_once()
+
+        # Check that we got the content from our mocked tool execution result
+        assert len(result.output) >= 1
+        assert isinstance(result.output[1], OpenAIResponseMessage)
+        assert result.output[1].content[0].text == "Dublin"
+        assert result.output[1].content[0].annotations == []
 
 
 @pytest.mark.asyncio

From ac5fd57387f8fded5e6129789e2d09d01f6d67ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 26 Jun 2025 04:31:05 +0200
Subject: [PATCH 5/8] chore: remove nested imports (#2515)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

* Given that our API packages use "import *" in `__init.py__` we don't
need to do `from llama_stack.apis.models.models` but simply from
llama_stack.apis.models. The decision to use `import *` is debatable and
should probably be revisited at one point.

* Remove unneeded Ruff F401 rule
* Consolidate Ruff F403 rule in the pyprojectfrom
llama_stack.apis.models.models

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/apis/agents/__init__.py           |  2 +-
 llama_stack/apis/batch_inference/__init__.py  |  2 +-
 llama_stack/apis/benchmarks/__init__.py       |  2 +-
 llama_stack/apis/datasetio/__init__.py        |  2 +-
 llama_stack/apis/datasets/__init__.py         |  2 +-
 llama_stack/apis/eval/__init__.py             |  2 +-
 llama_stack/apis/files/__init__.py            |  2 +-
 llama_stack/apis/inference/__init__.py        |  2 +-
 llama_stack/apis/inference/inference.py       |  2 +-
 llama_stack/apis/inspect/__init__.py          |  2 +-
 llama_stack/apis/models/__init__.py           |  2 +-
 llama_stack/apis/post_training/__init__.py    |  2 +-
 llama_stack/apis/providers/__init__.py        |  2 +-
 llama_stack/apis/safety/__init__.py           |  2 +-
 llama_stack/apis/scoring/__init__.py          |  2 +-
 .../apis/scoring_functions/__init__.py        |  2 +-
 llama_stack/apis/shields/__init__.py          |  2 +-
 .../synthetic_data_generation/__init__.py     |  2 +-
 llama_stack/apis/telemetry/__init__.py        |  2 +-
 llama_stack/apis/tools/__init__.py            |  4 ++--
 llama_stack/apis/vector_dbs/__init__.py       |  2 +-
 llama_stack/apis/vector_io/__init__.py        |  2 +-
 llama_stack/distribution/routers/inference.py | 14 +++++-------
 llama_stack/distribution/routers/vector_io.py | 10 ++++-----
 llama_stack/distribution/ui/modules/api.py    |  2 +-
 llama_stack/log.py                            |  2 +-
 .../agents/meta_reference/openai_responses.py |  4 ++--
 .../inline/eval/meta_reference/eval.py        |  2 +-
 .../inline/scoring/llm_as_judge/scoring.py    |  2 +-
 .../scoring_fn/llm_as_judge_scoring_fn.py     |  2 +-
 .../providers/inline/vector_io/faiss/faiss.py |  3 +--
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |  6 ++---
 .../remote/datasetio/nvidia/datasetio.py      |  2 +-
 .../remote/inference/anthropic/models.py      |  2 +-
 .../remote/inference/fireworks/fireworks.py   | 14 +++++-------
 .../remote/inference/fireworks/models.py      |  2 +-
 .../remote/inference/gemini/models.py         |  2 +-
 .../providers/remote/inference/groq/groq.py   |  2 +-
 .../remote/inference/nvidia/nvidia.py         | 12 +++++-----
 .../remote/inference/ollama/models.py         |  2 +-
 .../remote/inference/ollama/ollama.py         | 16 ++++++--------
 .../remote/inference/openai/models.py         |  2 +-
 .../remote/inference/openai/openai.py         |  2 +-
 .../inference/passthrough/passthrough.py      | 12 +++++-----
 .../remote/inference/runpod/runpod.py         |  2 +-
 .../remote/inference/together/models.py       |  2 +-
 .../remote/inference/together/together.py     | 12 +++++-----
 .../providers/remote/inference/vllm/vllm.py   | 10 ++++-----
 .../remote/inference/watsonx/watsonx.py       | 14 +++++-------
 .../utils/inference/litellm_openai_mixin.py   | 18 +++++++--------
 .../utils/inference/model_registry.py         |  2 +-
 .../utils/inference/openai_compat.py          | 22 +++++++++----------
 .../utils/memory/openai_vector_store_mixin.py |  3 +--
 .../providers/utils/telemetry/tracing.py      |  2 +-
 llama_stack/templates/cerebras/cerebras.py    |  2 +-
 llama_stack/templates/ci-tests/ci_tests.py    |  2 +-
 llama_stack/templates/dell/dell.py            |  2 +-
 llama_stack/templates/fireworks/fireworks.py  |  2 +-
 llama_stack/templates/groq/groq.py            |  2 +-
 .../templates/hf-endpoint/hf_endpoint.py      |  2 +-
 .../templates/hf-serverless/hf_serverless.py  |  2 +-
 llama_stack/templates/llama_api/llama_api.py  |  2 +-
 .../meta-reference-gpu/meta_reference.py      |  2 +-
 llama_stack/templates/ollama/ollama.py        |  2 +-
 .../open-benchmark/open_benchmark.py          |  2 +-
 .../templates/passthrough/passthrough.py      |  2 +-
 .../templates/postgres-demo/postgres_demo.py  |  2 +-
 llama_stack/templates/remote-vllm/vllm.py     |  2 +-
 llama_stack/templates/sambanova/sambanova.py  |  2 +-
 llama_stack/templates/starter/starter.py      |  2 +-
 llama_stack/templates/template.py             |  2 +-
 llama_stack/templates/tgi/tgi.py              |  2 +-
 llama_stack/templates/together/together.py    |  2 +-
 llama_stack/templates/vllm-gpu/vllm.py        |  2 +-
 llama_stack/templates/watsonx/watsonx.py      |  2 +-
 pyproject.toml                                |  3 +++
 .../routers/test_routing_tables.py            |  2 +-
 .../meta_reference/fixtures/__init__.py       |  2 +-
 .../meta_reference/test_openai_responses.py   |  2 +-
 tests/unit/providers/nvidia/test_safety.py    |  2 +-
 .../utils/inference/test_openai_compat.py     |  2 +-
 .../providers/utils/test_model_registry.py    |  2 +-
 82 files changed, 143 insertions(+), 164 deletions(-)

diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py
index ab203b6cd..6416b283b 100644
--- a/llama_stack/apis/agents/__init__.py
+++ b/llama_stack/apis/agents/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .agents import *  # noqa: F401 F403
+from .agents import *
diff --git a/llama_stack/apis/batch_inference/__init__.py b/llama_stack/apis/batch_inference/__init__.py
index 3249475ee..b9b2944b2 100644
--- a/llama_stack/apis/batch_inference/__init__.py
+++ b/llama_stack/apis/batch_inference/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .batch_inference import *  # noqa: F401 F403
+from .batch_inference import *
diff --git a/llama_stack/apis/benchmarks/__init__.py b/llama_stack/apis/benchmarks/__init__.py
index f8f564957..62d1b367c 100644
--- a/llama_stack/apis/benchmarks/__init__.py
+++ b/llama_stack/apis/benchmarks/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .benchmarks import *  # noqa: F401 F403
+from .benchmarks import *
diff --git a/llama_stack/apis/datasetio/__init__.py b/llama_stack/apis/datasetio/__init__.py
index 378afbba8..8c087bfa4 100644
--- a/llama_stack/apis/datasetio/__init__.py
+++ b/llama_stack/apis/datasetio/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .datasetio import *  # noqa: F401 F403
+from .datasetio import *
diff --git a/llama_stack/apis/datasets/__init__.py b/llama_stack/apis/datasets/__init__.py
index 102b9927f..9c9a128d2 100644
--- a/llama_stack/apis/datasets/__init__.py
+++ b/llama_stack/apis/datasets/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .datasets import *  # noqa: F401 F403
+from .datasets import *
diff --git a/llama_stack/apis/eval/__init__.py b/llama_stack/apis/eval/__init__.py
index 5f91ad70d..28a1d6049 100644
--- a/llama_stack/apis/eval/__init__.py
+++ b/llama_stack/apis/eval/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .eval import *  # noqa: F401 F403
+from .eval import *
diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py
index 269baf177..189e4de19 100644
--- a/llama_stack/apis/files/__init__.py
+++ b/llama_stack/apis/files/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .files import *  # noqa: F401 F403
+from .files import *
diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py
index f9f77f769..f0c8783c1 100644
--- a/llama_stack/apis/inference/__init__.py
+++ b/llama_stack/apis/inference/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .inference import *  # noqa: F401 F403
+from .inference import *
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index c64a5f750..222099064 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -20,7 +20,7 @@ from typing_extensions import TypedDict
 from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
-from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
+from llama_stack.apis.telemetry import MetricResponseMixin
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
diff --git a/llama_stack/apis/inspect/__init__.py b/llama_stack/apis/inspect/__init__.py
index 88ba8e908..016937e3d 100644
--- a/llama_stack/apis/inspect/__init__.py
+++ b/llama_stack/apis/inspect/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .inspect import *  # noqa: F401 F403
+from .inspect import *
diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py
index 410d8d1f9..ee90106b6 100644
--- a/llama_stack/apis/models/__init__.py
+++ b/llama_stack/apis/models/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .models import *  # noqa: F401 F403
+from .models import *
diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py
index 7129c4abd..695575a30 100644
--- a/llama_stack/apis/post_training/__init__.py
+++ b/llama_stack/apis/post_training/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .post_training import *  # noqa: F401 F403
+from .post_training import *
diff --git a/llama_stack/apis/providers/__init__.py b/llama_stack/apis/providers/__init__.py
index b554a5d23..e35e2fe47 100644
--- a/llama_stack/apis/providers/__init__.py
+++ b/llama_stack/apis/providers/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .providers import *  # noqa: F401 F403
+from .providers import *
diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py
index dc3fe90b4..d93bc1355 100644
--- a/llama_stack/apis/safety/__init__.py
+++ b/llama_stack/apis/safety/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .safety import *  # noqa: F401 F403
+from .safety import *
diff --git a/llama_stack/apis/scoring/__init__.py b/llama_stack/apis/scoring/__init__.py
index 0739dfc80..624b9e704 100644
--- a/llama_stack/apis/scoring/__init__.py
+++ b/llama_stack/apis/scoring/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .scoring import *  # noqa: F401 F403
+from .scoring import *
diff --git a/llama_stack/apis/scoring_functions/__init__.py b/llama_stack/apis/scoring_functions/__init__.py
index b96acb45f..fc1de0311 100644
--- a/llama_stack/apis/scoring_functions/__init__.py
+++ b/llama_stack/apis/scoring_functions/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .scoring_functions import *  # noqa: F401 F403
+from .scoring_functions import *
diff --git a/llama_stack/apis/shields/__init__.py b/llama_stack/apis/shields/__init__.py
index edad26100..783a4d124 100644
--- a/llama_stack/apis/shields/__init__.py
+++ b/llama_stack/apis/shields/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .shields import *  # noqa: F401 F403
+from .shields import *
diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py
index cfdec76ce..bc169e8e6 100644
--- a/llama_stack/apis/synthetic_data_generation/__init__.py
+++ b/llama_stack/apis/synthetic_data_generation/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .synthetic_data_generation import *  # noqa: F401 F403
+from .synthetic_data_generation import *
diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py
index 6a111dc9e..1250767f7 100644
--- a/llama_stack/apis/telemetry/__init__.py
+++ b/llama_stack/apis/telemetry/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .telemetry import *  # noqa: F401 F403
+from .telemetry import *
diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py
index be8846ba2..b25310ecf 100644
--- a/llama_stack/apis/tools/__init__.py
+++ b/llama_stack/apis/tools/__init__.py
@@ -4,5 +4,5 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .rag_tool import *  # noqa: F401 F403
-from .tools import *  # noqa: F401 F403
+from .rag_tool import *
+from .tools import *
diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_dbs/__init__.py
index 158241a6d..af34ba9d4 100644
--- a/llama_stack/apis/vector_dbs/__init__.py
+++ b/llama_stack/apis/vector_dbs/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .vector_dbs import *  # noqa: F401 F403
+from .vector_dbs import *
diff --git a/llama_stack/apis/vector_io/__init__.py b/llama_stack/apis/vector_io/__init__.py
index 3fe4fa4b6..3f4c60805 100644
--- a/llama_stack/apis/vector_io/__init__.py
+++ b/llama_stack/apis/vector_io/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .vector_io import *  # noqa: F401 F403
+from .vector_io import *
diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/distribution/routers/inference.py
index 50c429315..b39da7810 100644
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
@@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
     ListOpenAIChatCompletionResponse,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAICompletionWithInputMessages,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     Order,
     ResponseFormat,
     SamplingParams,
@@ -41,14 +47,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.log import get_logger
diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py
index 6af3bd416..4bd5952dc 100644
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     SearchRankingOptions,
     VectorIO,
-    VectorStoreDeleteResponse,
-    VectorStoreListResponse,
-    VectorStoreObject,
-    VectorStoreSearchResponsePage,
-)
-from llama_stack.apis.vector_io.vector_io import (
     VectorStoreChunkingStrategy,
+    VectorStoreDeleteResponse,
     VectorStoreFileContentsResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFileStatus,
+    VectorStoreListResponse,
+    VectorStoreObject,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py
index 11455ed46..9db87b280 100644
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@@ -25,7 +25,7 @@ class LlamaStackApi:
     def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
         """Run scoring on a single row"""
         if not scoring_params:
-            scoring_params = {fn_id: None for fn_id in scoring_function_ids}
+            scoring_params = dict.fromkeys(scoring_function_ids)
         return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
 
 
diff --git a/llama_stack/log.py b/llama_stack/log.py
index c14967f0a..fcbb79a5d 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -33,7 +33,7 @@ CATEGORIES = [
 ]
 
 # Initialize category levels with default level
-_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
+_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
 
 
 def config_to_category_levels(category: str, level: str):
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index cf3293ed0..f291593f4 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -45,7 +45,7 @@ from llama_stack.apis.agents.openai_responses import (
     WebSearchToolTypes,
 )
 from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     Inference,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
@@ -584,7 +584,7 @@ class OpenAIResponsesImpl:
         from llama_stack.apis.agents.openai_responses import (
             MCPListToolsTool,
         )
-        from llama_stack.apis.tools.tools import Tool
+        from llama_stack.apis.tools import Tool
 
         mcp_tool_to_server = {}
 
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index bc0898dc5..9ae2018c4 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -208,7 +208,7 @@ class MetaReferenceEvalImpl(
                 for scoring_fn_id in scoring_functions
             }
         else:
-            scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions}
+            scoring_functions_dict = dict.fromkeys(scoring_functions)
 
         score_response = await self.scoring_api.score(
             input_rows=score_input_rows, scoring_functions=scoring_functions_dict
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index b705cb9b3..2bd113a94 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -7,7 +7,7 @@ from typing import Any
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.scoring import (
     ScoreBatchResponse,
     ScoreResponse,
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index 51cdf6c3f..340215a53 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,7 +6,7 @@
 import re
 from typing import Any
 
-from llama_stack.apis.inference.inference import Inference, UserMessage
+from llama_stack.apis.inference import Inference, UserMessage
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 12f4d6ad0..355750b25 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -16,8 +16,7 @@ import numpy as np
 from numpy.typing import NDArray
 
 from llama_stack.apis.files import Files
-from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference, InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 3b3c5f486..7e977635a 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -15,8 +15,8 @@ import numpy as np
 import sqlite_vec
 from numpy.typing import NDArray
 
-from llama_stack.apis.files.files import Files
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.files import Files
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
@@ -64,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
     score_range = max_score - min_score
     if score_range > 0:
         return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
-    return {doc_id: 1.0 for doc_id in scores}
+    return dict.fromkeys(scores, 1.0)
 
 
 def _weighted_rerank(
diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index 1f22a935b..f723c92cc 100644
--- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter:
         Returns:
             Dataset
         """
-        ## add warnings for unsupported params
+        # add warnings for unsupported params
         request_body = {
             "name": dataset_def.identifier,
             "namespace": self.config.dataset_namespace,
diff --git a/llama_stack/providers/remote/inference/anthropic/models.py b/llama_stack/providers/remote/inference/anthropic/models.py
index 39cb64440..afaf3c4e4 100644
--- a/llama_stack/providers/remote/inference/anthropic/models.py
+++ b/llama_stack/providers/remote/inference/anthropic/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index 79b1b5f08..1c82ff3a8 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -24,6 +24,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -33,14 +39,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import (
diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py
index 027eeab8d..392aed72f 100644
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py
index ef1cf339f..c4bb4f08b 100644
--- a/llama_stack/providers/remote/inference/gemini/models.py
+++ b/llama_stack/providers/remote/inference/gemini/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index 27d7d7961..4b295e788 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -9,7 +9,7 @@ from typing import Any
 
 from openai import AsyncOpenAI
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChoiceDelta,
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index cb6c6e279..1dd72da3f 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -29,20 +29,18 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
     ToolChoice,
     ToolConfig,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference import (
diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py
index 8f0f0421a..cacf88861 100644
--- a/llama_stack/providers/remote/inference/ollama/models.py
+++ b/llama_stack/providers/remote/inference/ollama/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 2f51920b5..e9df0dcc8 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -32,15 +32,6 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
-    ResponseFormat,
-    SamplingParams,
-    TextTruncation,
-    ToolChoice,
-    ToolConfig,
-    ToolDefinition,
-    ToolPromptFormat,
-)
-from llama_stack.apis.inference.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAICompletion,
@@ -48,6 +39,13 @@ from llama_stack.apis.inference.inference import (
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
+    ResponseFormat,
+    SamplingParams,
+    TextTruncation,
+    ToolChoice,
+    ToolConfig,
+    ToolDefinition,
+    ToolPromptFormat,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py
index e029c456c..14a6955d5 100644
--- a/llama_stack/providers/remote/inference/openai/models.py
+++ b/llama_stack/providers/remote/inference/openai/models.py
@@ -6,7 +6,7 @@
 
 from dataclasses import dataclass
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py
index ed4ec22aa..72428422f 100644
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@@ -10,7 +10,7 @@ from typing import Any
 
 from openai import AsyncOpenAI
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAICompletion,
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py
index e9660abb9..d5b3a5973 100644
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -19,7 +19,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -28,13 +33,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model
 from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index f8c98893e..1863b8a50 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator
 from openai import OpenAI
 
 from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse
+from llama_stack.apis.inference import OpenAIEmbeddingsResponse
 
 # from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
index f4b259767..85e1b1848 100644
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 7030a644d..9e6877b7c 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -23,7 +23,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -33,13 +38,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index ae04f206a..d1455acaa 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -38,9 +38,13 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAICompletion,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -49,12 +53,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
 from llama_stack.models.llama.sku_list import all_registered_models
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
index 7cdd06a1f..78161d1cb 100644
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -18,10 +18,16 @@ from llama_stack.apis.inference import (
     CompletionRequest,
     EmbeddingsResponse,
     EmbeddingTaskType,
+    GreedySamplingStrategy,
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -29,14 +35,6 @@ from llama_stack.apis.inference import (
     ToolConfig,
     ToolDefinition,
     ToolPromptFormat,
-)
-from llama_stack.apis.inference.inference import (
-    GreedySamplingStrategy,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
 )
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index c21f379c9..d19908368 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -23,6 +23,13 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -31,16 +38,7 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
-from llama_stack.apis.models.models import Model
+from llama_stack.apis.models import Model
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index d707e36c2..de67e5288 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -8,7 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference import (
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index 01dfb8d61..47144ee0e 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -95,27 +95,25 @@ from llama_stack.apis.inference import (
     CompletionResponse,
     CompletionResponseStreamChunk,
     GreedySamplingStrategy,
-    Message,
-    SamplingParams,
-    SystemMessage,
-    TokenLogProbs,
-    ToolChoice,
-    ToolResponseMessage,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    UserMessage,
-)
-from llama_stack.apis.inference.inference import (
     JsonSchemaResponseFormat,
+    Message,
     OpenAIChatCompletion,
     OpenAICompletion,
     OpenAICompletionChoice,
     OpenAIEmbeddingData,
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
+    SamplingParams,
+    SystemMessage,
+    TokenLogProbs,
+    ToolChoice,
     ToolConfig,
+    ToolResponseMessage,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    UserMessage,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChoice as OpenAIChatCompletionChoice,
 )
 from llama_stack.models.llama.datatypes import (
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 8b962db76..d00624aed 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -12,8 +12,7 @@ import uuid
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.files.files import OpenAIFileObject
+from llama_stack.apis.files import Files, OpenAIFileObject
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py
index 10081f037..4ae68ee1d 100644
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@@ -180,7 +180,7 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
 
     trace_id = generate_trace_id()
     context = TraceContext(BACKGROUND_LOGGER, trace_id)
-    attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {})
+    attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | (attributes or {})
     context.push_span(name, attributes)
 
     CURRENT_TRACE_CONTEXT.set(context)
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index d891502d8..f341a88c1 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py
index afa8a23ce..7de8069ae 100644
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py
index a7ec5f3b8..5a6f52a89 100644
--- a/llama_stack/templates/dell/dell.py
+++ b/llama_stack/templates/dell/dell.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index 5e8935361..ad29c648f 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py
index 4e52aa42d..9e166a288 100644
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
index 69e037299..23887469f 100644
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
index ecfe2a167..c58c0921d 100644
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py
index b4641b9da..723cc44a3 100644
--- a/llama_stack/templates/llama_api/llama_api.py
+++ b/llama_stack/templates/llama_api/llama_api.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py
index 95d126095..57fb8f2af 100644
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 46c4852a4..cba25296b 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py
index d944d4eff..f0738ae5b 100644
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@@ -6,7 +6,7 @@
 
 
 from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     BenchmarkInput,
     DatasetInput,
diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py
index 6a30625c5..1b94a9aae 100644
--- a/llama_stack/templates/passthrough/passthrough.py
+++ b/llama_stack/templates/passthrough/passthrough.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py
index 759281567..a1a2aa2b7 100644
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index 2782a3ea0..94606e9d0 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index 54a49423d..38df6a4be 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index ec01d08e9..8e111e80a 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 712d2dcb4..7badff140 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -13,7 +13,7 @@ import yaml
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     Api,
     BenchmarkInput,
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
index 2c97cbf80..394cde18e 100644
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index 7761bd9fd..4c64ff3cd 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py
index 5775138b1..443fcd7a3 100644
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py
index 802aaf8f1..7fa3a55e5 100644
--- a/llama_stack/templates/watsonx/watsonx.py
+++ b/llama_stack/templates/watsonx/watsonx.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/pyproject.toml b/pyproject.toml
index 968a3ae60..97624fade 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -204,6 +204,9 @@ unfixable = [
     "RUF001",
     "PLE2515",
 ]
+"llama_stack/apis/**/__init__.py" = [
+    "F403",
+] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
 mypy_path = ["llama_stack"]
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 9cbdc8e51..0eeb68167 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -13,7 +13,7 @@ import pytest
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
 from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models.models import Model, ModelType
+from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.shields.shields import Shield
 from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter
 from llama_stack.apis.vector_dbs.vector_dbs import VectorDB
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
index e112bb6e5..2ebcd9970 100644
--- a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
+++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
@@ -8,7 +8,7 @@ import os
 
 import yaml
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
 )
 
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 7772dd2cc..0d1ef8eca 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -29,7 +29,7 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseTextFormat,
     WebSearchToolTypes,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartTextParam,
     OpenAIDeveloperMessageParam,
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index 8c74f178b..73fc32a02 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from llama_stack.apis.inference.inference import CompletionMessage, UserMessage
+from llama_stack.apis.inference import CompletionMessage, UserMessage
 from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
 from llama_stack.apis.shields import Shield
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
index 4c75b8a2f..3598e4810 100644
--- a/tests/unit/providers/utils/inference/test_openai_compat.py
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@@ -7,7 +7,7 @@
 import pytest
 
 from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     CompletionMessage,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartTextParam,
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 67f8a138f..10fa1e075 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -35,7 +35,7 @@
 
 import pytest
 
-from llama_stack.apis.models.models import Model
+from llama_stack.apis.models import Model
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 
 
From 36d70637b98441da9e7c7035dc5f35048bcd9e4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 26 Jun 2025 04:31:26 +0200
Subject: [PATCH 6/8] fix: finish conversion to StrEnum (#2514)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

We still had a few enum declared to behave like string as well as enum.
Let's use StrEnum for those.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/apis/datasets/datasets.py                       | 4 ++--
 llama_stack/apis/files/files.py                             | 4 ++--
 llama_stack/apis/models/models.py                           | 4 ++--
 llama_stack/distribution/access_control/datatypes.py        | 4 ++--
 llama_stack/distribution/datatypes.py                       | 6 +++---
 llama_stack/models/llama/datatypes.py                       | 4 ++--
 llama_stack/providers/datatypes.py                          | 4 ++--
 .../providers/inline/telemetry/meta_reference/config.py     | 4 ++--
 8 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index e3de3d5cb..8bf7a48d0 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import Enum, StrEnum
 from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
@@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
-class DatasetPurpose(str, Enum):
+class DatasetPurpose(StrEnum):
     """
     Purpose of the dataset. Each purpose has a required input data schema.
 
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 4dfeed448..a72dcd8d4 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Annotated, Literal, Protocol, runtime_checkable
 
 from fastapi import File, Form, Response, UploadFile
@@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 
 # OpenAI Files API Models
-class OpenAIFilePurpose(str, Enum):
+class OpenAIFilePurpose(StrEnum):
     """
     Valid purpose values for OpenAI Files API.
     """
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 3d90a92a0..36da97e62 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field
@@ -22,7 +22,7 @@ class CommonModelFields(BaseModel):
 
 
 @json_schema_type
-class ModelType(str, Enum):
+class ModelType(StrEnum):
     llm = "llm"
     embedding = "embedding"
 
diff --git a/llama_stack/distribution/access_control/datatypes.py b/llama_stack/distribution/access_control/datatypes.py
index bc5ed6645..c833ed51b 100644
--- a/llama_stack/distribution/access_control/datatypes.py
+++ b/llama_stack/distribution/access_control/datatypes.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Self
 
 from pydantic import BaseModel, model_validator
@@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator
 from .conditions import parse_conditions
 
 
-class Action(str, Enum):
+class Action(StrEnum):
     CREATE = "create"
     READ = "read"
     UPDATE = "update"
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index abc3f0065..5324e4c29 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Any
 
@@ -159,7 +159,7 @@ class LoggingConfig(BaseModel):
     )
 
 
-class AuthProviderType(str, Enum):
+class AuthProviderType(StrEnum):
     """Supported authentication provider types."""
 
     OAUTH2_TOKEN = "oauth2_token"
@@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception):
     pass
 
 
-class QuotaPeriod(str, Enum):
+class QuotaPeriod(StrEnum):
     DAY = "day"
 
 
diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py
index f9f094c3d..7f1ebed55 100644
--- a/llama_stack/models/llama/datatypes.py
+++ b/llama_stack/models/llama/datatypes.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import base64
-from enum import Enum
+from enum import Enum, StrEnum
 from io import BytesIO
 from typing import Annotated, Any, Literal
 
@@ -171,7 +171,7 @@ class GenerationResult(BaseModel):
     ignore_token: bool
 
 
-class QuantizationMode(str, Enum):
+class QuantizationMode(StrEnum):
     none = "none"
     fp8_mixed = "fp8_mixed"
     int4_mixed = "int4_mixed"
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 60b05545b..221ed9027 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
 
@@ -225,7 +225,7 @@ def remote_provider_spec(
     )
 
 
-class HealthStatus(str, Enum):
+class HealthStatus(StrEnum):
     OK = "OK"
     ERROR = "Error"
     NOT_IMPLEMENTED = "Not Implemented"
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py
index 93509040c..2baa204c9 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Any
 
 from pydantic import BaseModel, Field, field_validator
@@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator
 from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
 
 
-class TelemetrySink(str, Enum):
+class TelemetrySink(StrEnum):
     OTEL_TRACE = "otel_trace"
     OTEL_METRIC = "otel_metric"
     SQLITE = "sqlite"

From 43c1f39bd6fc9450f10dadfc23c27b4eaff37233 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 26 Jun 2025 04:50:08 +0200
Subject: [PATCH 7/8] refactor(env)!: enhanced environment variable
 substitution (#2490)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

This commit significantly improves the environment variable substitution
functionality in Llama Stack configuration files:
* The version field in configuration files has been changed from string
to integer type for better type consistency across build and run
configurations.

* The environment variable substitution system for ${env.FOO:} was fixed
and properly returns an error

* The environment variable substitution system for ${env.FOO+} returns
None instead of an empty strings, it better matches type annotations in
config fields

* The system includes automatic type conversion for boolean, integer,
and float values.

* The error messages have been enhanced to provide clearer guidance when
environment variables are missing, including suggestions for using
default values or conditional syntax.

* Comprehensive documentation has been added to the configuration guide
explaining all supported syntax patterns, best practices, and runtime
override capabilities.

* Multiple provider configurations have been updated to use the new
conditional syntax for optional API keys, making the system more
flexible for different deployment scenarios. The telemetry configuration
has been improved to properly handle optional endpoints with appropriate
validation, ensuring that required endpoints are specified when their
corresponding sinks are enabled.

* There were many instances of ${env.NVIDIA_API_KEY:} that should have
caused the code to fail. However, due to a bug, the distro server was
still being started, and early validation wasn’t triggered. As a result,
failures were likely being handled downstream by the providers. I’ve
maintained similar behavior by using ${env.NVIDIA_API_KEY:+}, though I
believe this is incorrect for many configurations. I’ll leave it to each
provider to correct it as needed.

* Environment variable substitution now uses the same syntax as Bash
parameter expansion.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 docs/source/distributions/configuration.md    | 105 ++++++++++++++++--
 .../distributions/k8s/stack-configmap.yaml    |  60 +++++-----
 .../distributions/k8s/stack_run_config.yaml   |  72 ++++++------
 llama_stack/distribution/datatypes.py         |   8 +-
 llama_stack/distribution/stack.py             |  54 +++++++--
 .../providers/inline/files/localfs/config.py  |   2 +-
 .../inline/inference/meta_reference/config.py |  10 +-
 .../providers/inline/inference/vllm/config.py |  12 +-
 .../inline/scoring/braintrust/config.py       |   2 +-
 .../inline/telemetry/meta_reference/config.py |  14 +--
 .../telemetry/meta_reference/telemetry.py     |   4 +
 .../inline/vector_io/qdrant/config.py         |   2 +-
 .../inline/vector_io/sqlite_vec/config.py     |   2 +-
 .../remote/datasetio/nvidia/config.py         |   8 +-
 .../providers/remote/eval/nvidia/config.py    |   2 +-
 .../remote/inference/nvidia/config.py         |   6 +-
 .../remote/inference/ollama/config.py         |   2 +-
 .../remote/inference/runpod/config.py         |   4 +-
 .../remote/inference/together/config.py       |   2 +-
 .../providers/remote/inference/vllm/config.py |   9 +-
 .../remote/inference/watsonx/config.py        |   6 +-
 .../remote/post_training/nvidia/config.py     |   8 +-
 .../providers/remote/safety/nvidia/config.py  |   4 +-
 .../tool_runtime/brave_search/config.py       |   2 +-
 .../tool_runtime/tavily_search/config.py      |   2 +-
 .../tool_runtime/wolfram_alpha/config.py      |   2 +-
 .../remote/vector_io/pgvector/config.py       |   4 +-
 llama_stack/providers/utils/kvstore/config.py |  24 ++--
 .../providers/utils/sqlstore/sqlstore.py      |  12 +-
 llama_stack/templates/bedrock/build.yaml      |   2 +-
 llama_stack/templates/bedrock/run.yaml        |  30 ++---
 llama_stack/templates/cerebras/build.yaml     |   2 +-
 llama_stack/templates/cerebras/run.yaml       |  30 ++---
 llama_stack/templates/ci-tests/build.yaml     |   2 +-
 llama_stack/templates/ci-tests/run.yaml       |  30 ++---
 llama_stack/templates/dell/build.yaml         |   2 +-
 .../templates/dell/run-with-safety.yaml       |  28 ++---
 llama_stack/templates/dell/run.yaml           |  28 ++---
 llama_stack/templates/fireworks/build.yaml    |   2 +-
 .../templates/fireworks/run-with-safety.yaml  |  36 +++---
 llama_stack/templates/fireworks/run.yaml      |  36 +++---
 llama_stack/templates/groq/build.yaml         |   2 +-
 llama_stack/templates/groq/run.yaml           |  30 ++---
 llama_stack/templates/hf-endpoint/build.yaml  |   2 +-
 .../hf-endpoint/run-with-safety.yaml          |  30 ++---
 llama_stack/templates/hf-endpoint/run.yaml    |  30 ++---
 .../templates/hf-serverless/build.yaml        |   2 +-
 .../hf-serverless/run-with-safety.yaml        |  30 ++---
 llama_stack/templates/hf-serverless/run.yaml  |  30 ++---
 llama_stack/templates/llama_api/build.yaml    |   2 +-
 llama_stack/templates/llama_api/llama_api.py  |  14 +--
 llama_stack/templates/llama_api/run.yaml      |  48 ++++----
 .../templates/meta-reference-gpu/build.yaml   |   2 +-
 .../meta-reference-gpu/run-with-safety.yaml   |  46 ++++----
 .../templates/meta-reference-gpu/run.yaml     |  38 +++----
 llama_stack/templates/nvidia/build.yaml       |   2 +-
 .../templates/nvidia/run-with-safety.yaml     |  52 ++++-----
 llama_stack/templates/nvidia/run.yaml         |  46 ++++----
 llama_stack/templates/ollama/build.yaml       |   2 +-
 .../templates/ollama/run-with-safety.yaml     |  38 +++----
 llama_stack/templates/ollama/run.yaml         |  38 +++----
 .../templates/open-benchmark/build.yaml       |   2 +-
 .../open-benchmark/open_benchmark.py          |  12 +-
 llama_stack/templates/open-benchmark/run.yaml |  48 ++++----
 llama_stack/templates/passthrough/build.yaml  |   2 +-
 .../passthrough/run-with-safety.yaml          |  32 +++---
 llama_stack/templates/passthrough/run.yaml    |  32 +++---
 .../templates/postgres-demo/build.yaml        |   2 +-
 .../templates/postgres-demo/postgres_demo.py  |  10 +-
 llama_stack/templates/postgres-demo/run.yaml  |  64 +++++------
 llama_stack/templates/remote-vllm/build.yaml  |   2 +-
 .../remote-vllm/run-with-safety.yaml          |  44 ++++----
 llama_stack/templates/remote-vllm/run.yaml    |  38 +++----
 llama_stack/templates/sambanova/build.yaml    |   2 +-
 llama_stack/templates/sambanova/run.yaml      |  40 +++----
 llama_stack/templates/sambanova/sambanova.py  |  12 +-
 llama_stack/templates/starter/build.yaml      |   2 +-
 llama_stack/templates/starter/run.yaml        |  90 +++++++--------
 llama_stack/templates/starter/starter.py      |  38 +++----
 llama_stack/templates/tgi/build.yaml          |   2 +-
 .../templates/tgi/run-with-safety.yaml        |  30 ++---
 llama_stack/templates/tgi/run.yaml            |  30 ++---
 llama_stack/templates/together/build.yaml     |   2 +-
 .../templates/together/run-with-safety.yaml   |  34 +++---
 llama_stack/templates/together/run.yaml       |  34 +++---
 llama_stack/templates/vllm-gpu/build.yaml     |   2 +-
 llama_stack/templates/vllm-gpu/run.yaml       |  42 +++----
 llama_stack/templates/watsonx/build.yaml      |   2 +-
 llama_stack/templates/watsonx/run.yaml        |  36 +++---
 .../llama-stack-provider-ollama/run.yaml      |  72 ++++++++----
 tests/unit/server/test_replace_env_vars.py    |  31 +++---
 91 files changed, 1053 insertions(+), 892 deletions(-)

diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index 4bc9b37e4..1b50ee712 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -18,7 +18,7 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -26,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -38,7 +38,7 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -46,7 +46,7 @@ providers:
 metadata_store:
   namespace: null
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -85,7 +85,7 @@ providers:
     # config is a dictionary that contains the configuration for the provider.
     # in this case, the configuration is the url of the ollama server
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
 ```
 A few things to note:
 - A _provider instance_ is identified with an (id, type, configuration) triplet.
@@ -94,6 +94,95 @@ A few things to note:
 - The configuration dictionary is provider-specific.
 - Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
 
+### Environment Variable Substitution
+
+Llama Stack supports environment variable substitution in configuration values using the
+`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide
+different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html)
+and follows similar patterns.
+
+#### Basic Syntax
+
+The basic syntax for environment variable substitution is:
+
+```yaml
+config:
+  api_key: ${env.API_KEY}
+  url: ${env.SERVICE_URL}
+```
+
+If the environment variable is not set, the server will raise an error during startup.
+
+#### Default Values
+
+You can provide default values using the `:=` operator:
+
+```yaml
+config:
+  url: ${env.OLLAMA_URL:=http://localhost:11434}
+  port: ${env.PORT:=8321}
+  timeout: ${env.TIMEOUT:=60}
+```
+
+If the environment variable is not set, the default value `http://localhost:11434` will be used.
+Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set.
+
+#### Conditional Values
+
+You can use the `:+` operator to provide a value only when the environment variable is set:
+
+```yaml
+config:
+  # Only include this field if ENVIRONMENT is set
+  environment: ${env.ENVIRONMENT:+production}
+```
+
+If the environment variable is set, the value after `:+` will be used. If it's not set, the field
+will be omitted with a `None` value.
+So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment
+variable is not set. It can be used to make a field optional and then enabled at runtime when desired.
+
+#### Examples
+
+Here are some common patterns:
+
+```yaml
+# Required environment variable (will error if not set)
+api_key: ${env.OPENAI_API_KEY}
+
+# Optional with default
+base_url: ${env.API_BASE_URL:=https://api.openai.com/v1}
+
+# Conditional field
+debug_mode: ${env.DEBUG:+true}
+
+# Optional field that becomes None if not set
+optional_token: ${env.OPTIONAL_TOKEN:+}
+```
+
+#### Runtime Override
+
+You can override environment variables at runtime when starting the server:
+
+```bash
+# Override specific environment variables
+llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
+
+# Or set them in your shell
+export API_KEY=sk-123
+export BASE_URL=https://custom-api.com
+llama stack run --config run.yaml
+```
+
+#### Type Safety
+
+The environment variable substitution system is type-safe:
+
+- String values remain strings
+- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None`
+- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer)
+- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean)
+
 ## Resources
 
 Finally, let's look at the `models` section:
@@ -152,7 +241,7 @@ server:
     config:
       jwks:
         uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
-        token: "${env.TOKEN:}"
+        token: "${env.TOKEN:+}"
         key_recheck_period: 3600
       tls_cafile: "/path/to/ca.crt"
       issuer: "https://kubernetes.default.svc"
@@ -396,12 +485,12 @@ providers:
   - provider_id: vllm-0
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:http://localhost:8000}
+      url: ${env.VLLM_URL:=http://localhost:8000}
   # this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
   - provider_id: vllm-1
     provider_type: remote::vllm
     config:
-      url: ${env.SAFETY_VLLM_URL:http://localhost:8001}
+      url: ${env.SAFETY_VLLM_URL:=http://localhost:8001}
 ...
 models:
 - metadata: {}
diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml
index fa7bacd8f..0a08bca03 100644
--- a/docs/source/distributions/k8s/stack-configmap.yaml
+++ b/docs/source/distributions/k8s/stack-configmap.yaml
@@ -15,10 +15,10 @@ data:
       - provider_id: vllm-inference
         provider_type: remote::vllm
         config:
-          url: ${env.VLLM_URL:http://localhost:8000/v1}
-          max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-          api_token: ${env.VLLM_API_TOKEN:fake}
-          tls_verify: ${env.VLLM_TLS_VERIFY:true}
+          url: ${env.VLLM_URL:=http://localhost:8000/v1}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
       - provider_id: vllm-safety
         provider_type: remote::vllm
         config:
@@ -30,10 +30,10 @@ data:
         provider_type: inline::sentence-transformers
         config: {}
       vector_io:
-      - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
         provider_type: remote::chromadb
         config:
-          url: ${env.CHROMADB_URL:}
+          url: ${env.CHROMADB_URL:+}
       safety:
       - provider_id: llama-guard
         provider_type: inline::llama-guard
@@ -45,34 +45,34 @@ data:
         config:
           persistence_store:
             type: postgres
-            host: ${env.POSTGRES_HOST:localhost}
-            port: ${env.POSTGRES_PORT:5432}
-            db: ${env.POSTGRES_DB:llamastack}
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
             user: ${env.POSTGRES_USER:llamastack}
-            password: ${env.POSTGRES_PASSWORD:llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
           responses_store:
             type: postgres
-            host: ${env.POSTGRES_HOST:localhost}
-            port: ${env.POSTGRES_PORT:5432}
-            db: ${env.POSTGRES_DB:llamastack}
-            user: ${env.POSTGRES_USER:llamastack}
-            password: ${env.POSTGRES_PASSWORD:llamastack}
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
       telemetry:
       - provider_id: meta-reference
         provider_type: inline::meta-reference
         config:
-          service_name: ${env.OTEL_SERVICE_NAME:}
+          service_name: ${env.OTEL_SERVICE_NAME:+}
           sinks: ${env.TELEMETRY_SINKS:console}
       tool_runtime:
       - provider_id: brave-search
         provider_type: remote::brave-search
         config:
-          api_key: ${env.BRAVE_SEARCH_API_KEY:}
+          api_key: ${env.BRAVE_SEARCH_API_KEY:+}
           max_results: 3
       - provider_id: tavily-search
         provider_type: remote::tavily-search
         config:
-          api_key: ${env.TAVILY_SEARCH_API_KEY:}
+          api_key: ${env.TAVILY_SEARCH_API_KEY:+}
           max_results: 3
       - provider_id: rag-runtime
         provider_type: inline::rag-runtime
@@ -82,19 +82,19 @@ data:
         config: {}
     metadata_store:
       type: postgres
-      host: ${env.POSTGRES_HOST:localhost}
-      port: ${env.POSTGRES_PORT:5432}
-      db: ${env.POSTGRES_DB:llamastack}
-      user: ${env.POSTGRES_USER:llamastack}
-      password: ${env.POSTGRES_PASSWORD:llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
       table_name: llamastack_kvstore
     inference_store:
       type: postgres
-      host: ${env.POSTGRES_HOST:localhost}
-      port: ${env.POSTGRES_PORT:5432}
-      db: ${env.POSTGRES_DB:llamastack}
-      user: ${env.POSTGRES_USER:llamastack}
-      password: ${env.POSTGRES_PASSWORD:llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
     models:
     - metadata:
         embedding_dimension: 384
@@ -106,11 +106,11 @@ data:
       provider_id: vllm-inference
       model_type: llm
     - metadata: {}
-      model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+      model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
       provider_id: vllm-safety
       model_type: llm
     shields:
-    - shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+    - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
     vector_dbs: []
     datasets: []
     scoring_fns: []
diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml
index 8e2773dd1..5ac08134c 100644
--- a/docs/source/distributions/k8s/stack_run_config.yaml
+++ b/docs/source/distributions/k8s/stack_run_config.yaml
@@ -12,25 +12,25 @@ providers:
   - provider_id: vllm-inference
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: vllm-safety
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
+      url: ${env.CHROMADB_URL:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -42,34 +42,34 @@ providers:
     config:
       persistence_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
       responses_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
-      sinks: ${env.TELEMETRY_SINKS:console}
+      service_name: ${env.OTEL_SERVICE_NAME:+console}
+      sinks: ${env.TELEMETRY_SINKS:+console}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -79,19 +79,19 @@ providers:
     config: {}
 metadata_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
   table_name: llamastack_kvstore
 inference_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
 models:
 - metadata:
     embedding_dimension: 384
@@ -103,11 +103,11 @@ models:
   provider_id: vllm-inference
   model_type: llm
 - metadata: {}
-  model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+  model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
   provider_id: vllm-safety
   model_type: llm
 shields:
-- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
 vector_dbs: []
 datasets: []
 scoring_fns: []
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index 5324e4c29..e07da001e 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
 
-LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
-LLAMA_STACK_RUN_CONFIG_VERSION = "2"
+LLAMA_STACK_BUILD_CONFIG_VERSION = 2
+LLAMA_STACK_RUN_CONFIG_VERSION = 2
 
 
 RoutingKey = str | list[str]
@@ -229,7 +229,7 @@ class ServerConfig(BaseModel):
 
 
 class StackRunConfig(BaseModel):
-    version: str = LLAMA_STACK_RUN_CONFIG_VERSION
+    version: int = LLAMA_STACK_RUN_CONFIG_VERSION
 
     image_name: str = Field(
         ...,
@@ -300,7 +300,7 @@ a default SQLite store will be used.""",
 
 
 class BuildConfig(BaseModel):
-    version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
+    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
 
     distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
     image_type: str = Field(
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index b33b0d3f7..c86880669 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -127,7 +127,12 @@ class EnvVarError(Exception):
     def __init__(self, var_name: str, path: str = ""):
         self.var_name = var_name
         self.path = path
-        super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
+        super().__init__(
+            f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
+            f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
+            f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
+            f"or ensure the environment variable is set."
+        )
 
 
 def replace_env_vars(config: Any, path: str = "") -> Any:
@@ -150,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
         return result
 
     elif isinstance(config, str):
-        # Updated pattern to support both default values (:) and conditional values (+)
-        pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}"
+        # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
+        pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
 
-        def get_env_var(match):
+        def get_env_var(match: re.Match):
             env_var = match.group(1)
-            operator = match.group(2)  # ':' for default, '+' for conditional
+            operator = match.group(2)  # '=' for default, '+' for conditional
             value_expr = match.group(3)
 
             env_value = os.environ.get(env_var)
 
-            if operator == ":":  # Default value syntax: ${env.FOO:default}
+            if operator == "=":  # Default value syntax: ${env.FOO:=default}
                 if not env_value:
-                    if value_expr is None:
+                    # value_expr returns empty string (not None) when not matched
+                    # This means ${env.FOO:=} is an error
+                    if value_expr == "":
                         raise EnvVarError(env_var, path)
                     else:
                         value = value_expr
                 else:
                     value = env_value
-            elif operator == "+":  # Conditional value syntax: ${env.FOO+value_if_set}
+            elif operator == "+":  # Conditional value syntax: ${env.FOO:+value_if_set}
                 if env_value:
                     value = value_expr
                 else:
@@ -183,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
             return os.path.expanduser(value)
 
         try:
-            return re.sub(pattern, get_env_var, config)
+            result = re.sub(pattern, get_env_var, config)
+            return _convert_string_to_proper_type(result)
         except EnvVarError as e:
             raise EnvVarError(e.var_name, e.path) from None
 
     return config
 
 
+def _convert_string_to_proper_type(value: str) -> Any:
+    # This might be tricky depending on what the config type is, if  'str | None' we are
+    # good, if 'str' we need to keep the empty string... 'str | None' is more common and
+    # providers config should be typed this way.
+    # TODO: we could try to load the config class and see if the config has a field with type 'str | None'
+    # and then convert the empty string to None or not
+    if value == "":
+        return None
+
+    lowered = value.lower()
+    if lowered == "true":
+        return True
+    elif lowered == "false":
+        return False
+
+    try:
+        return int(value)
+    except ValueError:
+        pass
+
+    try:
+        return float(value)
+    except ValueError:
+        pass
+
+    return value
+
+
 def validate_env_pair(env_pair: str) -> tuple[str, str]:
     """Validate and split an environment variable key-value pair."""
     try:
diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py
index 757a70742..6c767af8f 100644
--- a/llama_stack/providers/inline/files/localfs/config.py
+++ b/llama_stack/providers/inline/files/localfs/config.py
@@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
+            "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
             "metadata_store": SqliteSqlStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
                 db_name="files_metadata.db",
diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py
index 7bc961443..9556b026a 100644
--- a/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel):
     def sample_run_config(
         cls,
         model: str = "Llama3.2-3B-Instruct",
-        checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
-        quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}",
-        model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}",
-        max_batch_size: str = "${env.MAX_BATCH_SIZE:1}",
-        max_seq_len: str = "${env.MAX_SEQ_LEN:4096}",
+        checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}",
+        quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}",
+        model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}",
+        max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}",
+        max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}",
         **kwargs,
     ) -> dict[str, Any]:
         return {
diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py
index ce8743c74..660ef206b 100644
--- a/llama_stack/providers/inline/inference/vllm/config.py
+++ b/llama_stack/providers/inline/inference/vllm/config.py
@@ -44,10 +44,10 @@ class VLLMConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
-            "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}",
-            "max_tokens": "${env.MAX_TOKENS:4096}",
-            "max_model_len": "${env.MAX_MODEL_LEN:4096}",
-            "max_num_seqs": "${env.MAX_NUM_SEQS:4}",
-            "enforce_eager": "${env.ENFORCE_EAGER:False}",
-            "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}",
+            "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}",
+            "max_tokens": "${env.MAX_TOKENS:=4096}",
+            "max_model_len": "${env.MAX_MODEL_LEN:=4096}",
+            "max_num_seqs": "${env.MAX_NUM_SEQS:=4}",
+            "enforce_eager": "${env.ENFORCE_EAGER:=False}",
+            "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}",
         }
diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/llama_stack/providers/inline/scoring/braintrust/config.py
index 4a80f1e4f..f44d27f96 100644
--- a/llama_stack/providers/inline/scoring/braintrust/config.py
+++ b/llama_stack/providers/inline/scoring/braintrust/config.py
@@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "openai_api_key": "${env.OPENAI_API_KEY:}",
+            "openai_api_key": "${env.OPENAI_API_KEY:+}",
         }
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py
index 2baa204c9..50dd8a788 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@@ -20,12 +20,12 @@ class TelemetrySink(StrEnum):
 
 
 class TelemetryConfig(BaseModel):
-    otel_trace_endpoint: str = Field(
-        default="http://localhost:4318/v1/traces",
+    otel_trace_endpoint: str | None = Field(
+        default=None,
         description="The OpenTelemetry collector endpoint URL for traces",
     )
-    otel_metric_endpoint: str = Field(
-        default="http://localhost:4318/v1/metrics",
+    otel_metric_endpoint: str | None = Field(
+        default=None,
         description="The OpenTelemetry collector endpoint URL for metrics",
     )
     service_name: str = Field(
@@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
         return {
-            "service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
-            "sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
-            "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
+            "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
+            "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
         }
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
index 0f6cf8619..98f5bf5a1 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
             trace.set_tracer_provider(provider)
             _TRACER_PROVIDER = provider
             if TelemetrySink.OTEL_TRACE in self.config.sinks:
+                if self.config.otel_trace_endpoint is None:
+                    raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled")
                 span_exporter = OTLPSpanExporter(
                     endpoint=self.config.otel_trace_endpoint,
                 )
                 span_processor = BatchSpanProcessor(span_exporter)
                 trace.get_tracer_provider().add_span_processor(span_processor)
             if TelemetrySink.OTEL_METRIC in self.config.sinks:
+                if self.config.otel_metric_endpoint is None:
+                    raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled")
                 metric_reader = PeriodicExportingMetricReader(
                     OTLPMetricExporter(
                         endpoint=self.config.otel_metric_endpoint,
diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py
index 283724b41..7cc91d918 100644
--- a/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
+            "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
         }
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
index cb806cb39..4c57f4aba 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
@@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db",
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
         }
diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/llama_stack/providers/remote/datasetio/nvidia/config.py
index e616ce25c..0f5ea22e9 100644
--- a/llama_stack/providers/remote/datasetio/nvidia/config.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/config.py
@@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "api_key": "${env.NVIDIA_API_KEY:}",
-            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
-            "project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
-            "datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
+            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
+            "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
+            "datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
         }
diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py
index 5c8f9ff76..7a1c04304 100644
--- a/llama_stack/providers/remote/eval/nvidia/config.py
+++ b/llama_stack/providers/remote/eval/nvidia/config.py
@@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
+            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}",
         }
diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py
index 4c449edc2..6369928bb 100644
--- a/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/llama_stack/providers/remote/inference/nvidia/config.py
@@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
-            "api_key": "${env.NVIDIA_API_KEY:}",
-            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}",
+            "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
+            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
         }
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py
index 37b827f4f..b2cc4d8a7 100644
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@@ -17,7 +17,7 @@ class OllamaImplConfig(BaseModel):
 
     @classmethod
     def sample_run_config(
-        cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
+        cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
     ) -> dict[str, Any]:
         return {
             "url": url,
diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/llama_stack/providers/remote/inference/runpod/config.py
index e3913dc35..ff32a971c 100644
--- a/llama_stack/providers/remote/inference/runpod/config.py
+++ b/llama_stack/providers/remote/inference/runpod/config.py
@@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
-            "url": "${env.RUNPOD_URL:}",
-            "api_token": "${env.RUNPOD_API_TOKEN:}",
+            "url": "${env.RUNPOD_URL:+}",
+            "api_token": "${env.RUNPOD_API_TOKEN:+}",
         }
diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py
index 5c7f60519..121e2cae7 100644
--- a/llama_stack/providers/remote/inference/together/config.py
+++ b/llama_stack/providers/remote/inference/together/config.py
@@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "url": "https://api.together.xyz/v1",
-            "api_key": "${env.TOGETHER_API_KEY:}",
+            "api_key": "${env.TOGETHER_API_KEY:+}",
         }
diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py
index 99abddf51..e11efa7f0 100644
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel):
     @classmethod
     def validate_tls_verify(cls, v):
         if isinstance(v, str):
-            # Check if it's a boolean string
-            if v.lower() in ("true", "false"):
-                return v.lower() == "true"
             # Otherwise, treat it as a cert path
             cert_path = Path(v).expanduser().resolve()
             if not cert_path.exists():
@@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
     ):
         return {
             "url": url,
-            "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
-            "api_token": "${env.VLLM_API_TOKEN:fake}",
-            "tls_verify": "${env.VLLM_TLS_VERIFY:true}",
+            "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
+            "api_token": "${env.VLLM_API_TOKEN:=fake}",
+            "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
         }
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py
index 5eda9c5c0..9534eceeb 100644
--- a/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
-            "api_key": "${env.WATSONX_API_KEY:}",
-            "project_id": "${env.WATSONX_PROJECT_ID:}",
+            "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
+            "api_key": "${env.WATSONX_API_KEY:+}",
+            "project_id": "${env.WATSONX_PROJECT_ID:+}",
         }
diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/llama_stack/providers/remote/post_training/nvidia/config.py
index fa08b6e3f..ea6dff0b5 100644
--- a/llama_stack/providers/remote/post_training/nvidia/config.py
+++ b/llama_stack/providers/remote/post_training/nvidia/config.py
@@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "api_key": "${env.NVIDIA_API_KEY:}",
-            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
-            "project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
-            "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
+            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
+            "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
+            "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}",
         }
 
 
diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py
index ddf5a3a0b..1c618f4f4 100644
--- a/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/llama_stack/providers/remote/safety/nvidia/config.py
@@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
-            "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}",
+            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}",
+            "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}",
         }
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/llama_stack/providers/remote/tool_runtime/brave_search/config.py
index 37ba21304..93b97a1b2 100644
--- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py
@@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "api_key": "${env.BRAVE_SEARCH_API_KEY:}",
+            "api_key": "${env.BRAVE_SEARCH_API_KEY:+}",
             "max_results": 3,
         }
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
index c9b18d30d..5bdd27807 100644
--- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
@@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "api_key": "${env.TAVILY_SEARCH_API_KEY:}",
+            "api_key": "${env.TAVILY_SEARCH_API_KEY:+}",
             "max_results": 3,
         }
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
index aefc86bd6..b5b10e371 100644
--- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
+++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
@@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
-            "api_key": "${env.WOLFRAM_ALPHA_API_KEY:}",
+            "api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}",
         }
diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py
index 04b92a2e4..041e864ca 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel):
     @classmethod
     def sample_run_config(
         cls,
-        host: str = "${env.PGVECTOR_HOST:localhost}",
-        port: int = "${env.PGVECTOR_PORT:5432}",
+        host: str = "${env.PGVECTOR_HOST:=localhost}",
+        port: int = "${env.PGVECTOR_PORT:=5432}",
         db: str = "${env.PGVECTOR_DB}",
         user: str = "${env.PGVECTOR_USER}",
         password: str = "${env.PGVECTOR_PASSWORD}",
diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py
index e966e13ba..a45ff4ce8 100644
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig):
         return {
             "type": "redis",
             "namespace": None,
-            "host": "${env.REDIS_HOST:localhost}",
-            "port": "${env.REDIS_PORT:6379}",
+            "host": "${env.REDIS_HOST:=localhost}",
+            "port": "${env.REDIS_PORT:=6379}",
         }
 
 
@@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig):
         return {
             "type": "sqlite",
             "namespace": None,
-            "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
         }
 
 
@@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig):
         return {
             "type": "postgres",
             "namespace": None,
-            "host": "${env.POSTGRES_HOST:localhost}",
-            "port": "${env.POSTGRES_PORT:5432}",
-            "db": "${env.POSTGRES_DB:llamastack}",
-            "user": "${env.POSTGRES_USER:llamastack}",
-            "password": "${env.POSTGRES_PASSWORD:llamastack}",
-            "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
+            "host": "${env.POSTGRES_HOST:=localhost}",
+            "port": "${env.POSTGRES_PORT:=5432}",
+            "db": "${env.POSTGRES_DB:=llamastack}",
+            "user": "${env.POSTGRES_USER:=llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
+            "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
         }
 
     @classmethod
@@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig):
         return {
             "type": "mongodb",
             "namespace": None,
-            "host": "${env.MONGODB_HOST:localhost}",
-            "port": "${env.MONGODB_PORT:5432}",
+            "host": "${env.MONGODB_HOST:=localhost}",
+            "port": "${env.MONGODB_PORT:=5432}",
             "db": "${env.MONGODB_DB}",
             "user": "${env.MONGODB_USER}",
             "password": "${env.MONGODB_PASSWORD}",
-            "collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}",
+            "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
         }
 
 
diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py
index edc7672a3..d558a2a26 100644
--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
     def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
         return cls(
             type="sqlite",
-            db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
         )
 
     @property
@@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
     def sample_run_config(cls, **kwargs):
         return cls(
             type="postgres",
-            host="${env.POSTGRES_HOST:localhost}",
-            port="${env.POSTGRES_PORT:5432}",
-            db="${env.POSTGRES_DB:llamastack}",
-            user="${env.POSTGRES_USER:llamastack}",
-            password="${env.POSTGRES_PASSWORD:llamastack}",
+            host="${env.POSTGRES_HOST:=localhost}",
+            port="${env.POSTGRES_PORT:=5432}",
+            db="${env.POSTGRES_DB:=llamastack}",
+            user="${env.POSTGRES_USER:=llamastack}",
+            password="${env.POSTGRES_PASSWORD:=llamastack}",
         )
 
 
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
index 97a06f77a..1a2c883fa 100644
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use AWS Bedrock for running LLM inference and safety
   providers:
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index 8033b2086..61bc83f02 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: bedrock
 apis:
 - agents
@@ -22,7 +22,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
   safety:
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -34,17 +34,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -52,7 +52,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -60,14 +60,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -78,17 +78,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -98,10 +98,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db
 models:
 - metadata: {}
   model_id: meta.llama3-1-8b-instruct-v1:0
diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml
index f26f4ed9b..ecd0ac418 100644
--- a/llama_stack/templates/cerebras/build.yaml
+++ b/llama_stack/templates/cerebras/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Cerebras for running LLM inference
   providers:
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index 490648302..9bd8fcc7c 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: cerebras
 apis:
 - agents
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -40,10 +40,10 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -51,7 +51,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -59,14 +59,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -77,34 +77,34 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/inference_store.db
 models:
 - metadata: {}
   model_id: llama3.1-8b
diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml
index 9f4fbbdda..c061d0793 100644
--- a/llama_stack/templates/ci-tests/build.yaml
+++ b/llama_stack/templates/ci-tests/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Distribution for running e2e tests in CI
   providers:
diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml
index 92497b0bf..4b7de1c0c 100644
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: ci-tests
 apis:
 - agents
@@ -24,7 +24,7 @@ providers:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/sqlite_vec.db
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -37,17 +37,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -55,7 +55,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -63,14 +63,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -81,17 +81,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -101,10 +101,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml
index 513df16c1..ff8d58a08 100644
--- a/llama_stack/templates/dell/build.yaml
+++ b/llama_stack/templates/dell/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
     container
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
index 22cf1fd24..7f1d0a8c0 100644
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: dell
 apis:
 - agents
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,27 +84,27 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
index aeca2fc26..310f3cc20 100644
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: dell
 apis:
 - agents
@@ -36,17 +36,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -54,7 +54,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -62,14 +62,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -80,27 +80,27 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
index 53b47da41..eb08c1d43 100644
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Fireworks.AI for running LLM inference
   providers:
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index 302328486..6265f5cae 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: fireworks
 apis:
 - agents
@@ -28,7 +28,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -46,17 +46,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -64,7 +64,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -72,14 +72,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -90,30 +90,30 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -122,10 +122,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index a31ed732b..e10404e92 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: fireworks
 apis:
 - agents
@@ -28,7 +28,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -41,17 +41,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -59,7 +59,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -67,14 +67,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -85,30 +85,30 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -117,10 +117,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml
index 819df22f0..7e50a899f 100644
--- a/llama_stack/templates/groq/build.yaml
+++ b/llama_stack/templates/groq/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Groq for running LLM inference
   providers:
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index 7f1912a6f..21c8f7e0f 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: groq
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,27 +84,27 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/inference_store.db
 models:
 - metadata: {}
   model_id: groq/llama3-8b-8192
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
index 8ede83694..9fca9ac22 100644
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
   providers:
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index 8b00f4ba5..2ae1d7685 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-endpoint
 apis:
 - agents
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -109,10 +109,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index 8a9cd5c49..3ec5ae9c1 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-endpoint
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -104,10 +104,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
index d0752db9a..214245116 100644
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
   providers:
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index fec64c1df..3871b77e7 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-serverless
 apis:
 - agents
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -109,10 +109,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index d4a6286d7..0a5b59400 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-serverless
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -104,10 +104,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/llama_api/build.yaml b/llama_stack/templates/llama_api/build.yaml
index 857e5f014..44a42594a 100644
--- a/llama_stack/templates/llama_api/build.yaml
+++ b/llama_stack/templates/llama_api/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Distribution for running e2e tests in CI
   providers:
diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py
index 723cc44a3..7631781af 100644
--- a/llama_stack/templates/llama_api/llama_api.py
+++ b/llama_stack/templates/llama_api/llama_api.py
@@ -41,7 +41,7 @@ def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
         (
             "llama-openai-compat",
             LLLAMA_MODEL_ENTRIES,
-            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"),
+            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:+}"),
         ),
     ]
     inference_providers = []
@@ -85,17 +85,17 @@ def get_distribution_template() -> DistributionTemplate:
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml
index 2185eb4fc..b627ed2f1 100644
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: llama_api
 apis:
 - agents
@@ -16,7 +16,7 @@ providers:
     provider_type: remote::llama-openai-compat
     config:
       openai_compat_api_base: https://api.llama.com/compat/v1/
-      api_key: ${env.LLAMA_API_KEY:}
+      api_key: ${env.LLAMA_API_KEY:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -24,19 +24,19 @@ providers:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -49,17 +49,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -67,7 +67,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -75,14 +75,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -93,17 +93,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -113,10 +113,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/inference_store.db
 models:
 - metadata: {}
   model_id: Llama-3.3-70B-Instruct
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
index 53ad411e3..2119eeddd 100644
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Meta Reference for running LLM inference
   providers:
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index e65445a9e..6b15a1e01 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: meta-reference-gpu
 apis:
 - agents
@@ -18,10 +18,10 @@ providers:
       model: ${env.INFERENCE_MODEL}
       checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
       quantization:
-        type: ${env.QUANTIZATION_TYPE:bf16}
-      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0}
-      max_batch_size: ${env.MAX_BATCH_SIZE:1}
-      max_seq_len: ${env.MAX_SEQ_LEN:4096}
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -31,10 +31,10 @@ providers:
       model: ${env.SAFETY_MODEL}
       checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
       quantization:
-        type: ${env.QUANTIZATION_TYPE:bf16}
-      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0}
-      max_batch_size: ${env.MAX_BATCH_SIZE:1}
-      max_seq_len: ${env.MAX_SEQ_LEN:4096}
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -42,7 +42,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -55,17 +55,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -73,7 +73,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -81,14 +81,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -99,17 +99,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -119,10 +119,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index 8ef02f14d..1b44a0b3e 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: meta-reference-gpu
 apis:
 - agents
@@ -18,10 +18,10 @@ providers:
       model: ${env.INFERENCE_MODEL}
       checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
       quantization:
-        type: ${env.QUANTIZATION_TYPE:bf16}
-      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0}
-      max_batch_size: ${env.MAX_BATCH_SIZE:1}
-      max_seq_len: ${env.MAX_SEQ_LEN:4096}
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -109,10 +109,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml
index 6bd8a0100..51685b2e3 100644
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use NVIDIA NIM for running LLM inference, evaluation and safety
   providers:
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
index eebfa1066..875fccc9d 100644
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: nvidia
 apis:
 - agents
@@ -16,14 +16,14 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
-      api_key: ${env.NVIDIA_API_KEY:}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -31,13 +31,13 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
   safety:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -45,30 +45,30 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
   eval:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
   datasetio:
   - provider_id: localfs
     provider_type: inline::localfs
@@ -76,14 +76,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -94,10 +94,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index be0e3f6d1..4477d5244 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: nvidia
 apis:
 - agents
@@ -16,9 +16,9 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
-      api_key: ${env.NVIDIA_API_KEY:}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -26,13 +26,13 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
   safety:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -40,38 +40,38 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
   eval:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
   datasetio:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -82,10 +82,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
   model_id: meta/llama3-8b-instruct
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index ebe0849f3..cbf4281a2 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) Ollama server for running LLM inference
   providers:
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 2e1b7fdcc..5e906a12c 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: ollama
 apis:
 - agents
@@ -17,7 +17,7 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
       raise_on_connect_error: true
   vector_io:
   - provider_id: faiss
@@ -26,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -41,17 +41,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -59,7 +59,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -67,14 +67,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -85,15 +85,15 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
@@ -105,12 +105,12 @@ providers:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -121,13 +121,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 8c2b17ef1..d2b4e3978 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: ollama
 apis:
 - agents
@@ -17,7 +17,7 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
       raise_on_connect_error: true
   vector_io:
   - provider_id: faiss
@@ -26,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -39,17 +39,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -57,7 +57,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -65,14 +65,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -83,15 +83,15 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
@@ -103,12 +103,12 @@ providers:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -119,13 +119,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/open-benchmark/build.yaml b/llama_stack/templates/open-benchmark/build.yaml
index 840f1e1db..5f82c5243 100644
--- a/llama_stack/templates/open-benchmark/build.yaml
+++ b/llama_stack/templates/open-benchmark/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Distribution for running open benchmarks
   providers:
diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py
index f0738ae5b..b4cfbdb52 100644
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@@ -120,17 +120,17 @@ def get_distribution_template() -> DistributionTemplate:
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml
index 051ca6f8e..403b0fd3d 100644
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: open-benchmark
 apis:
 - agents
@@ -33,24 +33,24 @@ providers:
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   vector_io:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -63,17 +63,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -81,7 +81,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -89,14 +89,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -107,17 +107,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -127,10 +127,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml
index 46b99cb75..e2e041dbc 100644
--- a/llama_stack/templates/passthrough/build.yaml
+++ b/llama_stack/templates/passthrough/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Passthrough hosted llama-stack endpoint for LLM inference
   providers:
diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml
index 3168eeb9f..c5b047511 100644
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: passthrough
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,22 +89,22 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -113,10 +113,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml
index 48abf8577..896b3c91e 100644
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: passthrough
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,22 +84,22 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -108,10 +108,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml
index 6416cd00f..645b59613 100644
--- a/llama_stack/templates/postgres-demo/build.yaml
+++ b/llama_stack/templates/postgres-demo/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Quick start template for running Llama Stack with several popular providers
   providers:
diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py
index a1a2aa2b7..5d42b8901 100644
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@@ -50,9 +50,9 @@ def get_distribution_template() -> DistributionTemplate:
 
     vector_io_providers = [
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
     ]
     default_tool_groups = [
@@ -114,9 +114,9 @@ def get_distribution_template() -> DistributionTemplate:
                             provider_id="meta-reference",
                             provider_type="inline::meta-reference",
                             config=dict(
-                                service_name="${env.OTEL_SERVICE_NAME:}",
-                                sinks="${env.TELEMETRY_SINKS:console,otel_trace}",
-                                otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}",
+                                service_name="${env.OTEL_SERVICE_NAME:+}",
+                                sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
+                                otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
                             ),
                         )
                     ],
diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml
index 0e0d020b2..03b7a59fb 100644
--- a/llama_stack/templates/postgres-demo/run.yaml
+++ b/llama_stack/templates/postgres-demo/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: postgres-demo
 apis:
 - agents
@@ -13,17 +13,17 @@ providers:
     provider_type: remote::vllm
     config:
       url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
+      url: ${env.CHROMADB_URL:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -35,35 +35,35 @@ providers:
     config:
       persistence_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
       responses_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
-      sinks: ${env.TELEMETRY_SINKS:console,otel_trace}
-      otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}
+      service_name: ${env.OTEL_SERVICE_NAME:+}
+      sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
+      otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -73,19 +73,19 @@ providers:
     config: {}
 metadata_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
-  table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
+  table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
 inference_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml
index 16fe5d4fd..0298b01c7 100644
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) vLLM server for running LLM inference
   providers:
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 64f71087a..b297f1489 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: remote-vllm
 apis:
 - agents
@@ -16,16 +16,16 @@ providers:
     provider_type: remote::vllm
     config:
       url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: vllm-safety
     provider_type: remote::vllm
     config:
       url: ${env.SAFETY_VLLM_URL}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -36,7 +36,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -49,10 +49,10 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -60,7 +60,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -68,14 +68,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -86,24 +86,24 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -114,13 +114,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 353b9902d..6bd332cc9 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: remote-vllm
 apis:
 - agents
@@ -16,9 +16,9 @@ providers:
     provider_type: remote::vllm
     config:
       url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -29,7 +29,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -42,10 +42,10 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -53,7 +53,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -61,14 +61,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -79,24 +79,24 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -107,13 +107,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml
index 14b1c8974..ba70f88c6 100644
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use SambaNova for running LLM inference and safety
   providers:
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index 58d0d36e3..b96621b58 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: sambanova
 apis:
 - agents
@@ -24,19 +24,19 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   safety:
   - provider_id: sambanova
     provider_type: remote::sambanova
@@ -50,27 +50,27 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -81,13 +81,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/inference_store.db
 models:
 - metadata: {}
   model_id: sambanova/Meta-Llama-3.1-8B-Instruct
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index 38df6a4be..428577697 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -73,17 +73,17 @@ def get_distribution_template() -> DistributionTemplate:
             ),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml
index 9bf4913a7..3b48dcf7a 100644
--- a/llama_stack/templates/starter/build.yaml
+++ b/llama_stack/templates/starter/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Quick start template for running Llama Stack with several popular providers
   providers:
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index 30df39e5d..f7c53170b 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: starter
 apis:
 - agents
@@ -16,47 +16,47 @@ providers:
   - provider_id: openai
     provider_type: remote::openai
     config:
-      api_key: ${env.OPENAI_API_KEY:}
+      api_key: ${env.OPENAI_API_KEY:+}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
       url: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY:}
+      api_key: ${env.FIREWORKS_API_KEY:+}
   - provider_id: together
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
       raise_on_connect_error: false
   - provider_id: anthropic
     provider_type: remote::anthropic
     config:
-      api_key: ${env.ANTHROPIC_API_KEY:}
+      api_key: ${env.ANTHROPIC_API_KEY:+}
   - provider_id: gemini
     provider_type: remote::gemini
     config:
-      api_key: ${env.GEMINI_API_KEY:}
+      api_key: ${env.GEMINI_API_KEY:+}
   - provider_id: groq
     provider_type: remote::groq
     config:
       url: https://api.groq.com
-      api_key: ${env.GROQ_API_KEY:}
+      api_key: ${env.GROQ_API_KEY:+}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
       url: https://api.sambanova.ai/v1
-      api_key: ${env.SAMBANOVA_API_KEY:}
+      api_key: ${env.SAMBANOVA_API_KEY:+}
   - provider_id: vllm
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -67,31 +67,31 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
   - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec}
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/starter/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -104,17 +104,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -122,7 +122,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -130,14 +130,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -148,17 +148,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -168,10 +168,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
@@ -538,15 +538,15 @@ models:
   provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
   model_type: llm
 - metadata: {}
-  model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:__disabled__}
+  model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:=__disabled__}
   provider_id: ollama
-  provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:__disabled__}
+  provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__}
   model_type: llm
 - metadata:
-    embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:384}
-  model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:__disabled__}
+    embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384}
+  model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
   provider_id: ollama
-  provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:__disabled__}
+  provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
   model_type: embedding
 - metadata: {}
   model_id: anthropic/claude-3-5-sonnet-latest
@@ -802,9 +802,9 @@ models:
   provider_model_id: sambanova/Meta-Llama-Guard-3-8B
   model_type: llm
 - metadata: {}
-  model_id: vllm/${env.VLLM_INFERENCE_MODEL:__disabled__}
+  model_id: vllm/${env.VLLM_INFERENCE_MODEL:=__disabled__}
   provider_id: vllm
-  provider_model_id: ${env.VLLM_INFERENCE_MODEL:__disabled__}
+  provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__}
   model_type: llm
 - metadata:
     embedding_dimension: 384
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index 8e111e80a..df31fed84 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -69,67 +69,67 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
         (
             "openai",
             OPENAI_MODEL_ENTRIES,
-            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
+            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"),
         ),
         (
             "fireworks",
             FIREWORKS_MODEL_ENTRIES,
-            FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
+            FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:+}"),
         ),
         (
             "together",
             TOGETHER_MODEL_ENTRIES,
-            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
+            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"),
         ),
         (
             "ollama",
             [
                 ProviderModelEntry(
-                    provider_model_id="${env.OLLAMA_INFERENCE_MODEL:__disabled__}",
+                    provider_model_id="${env.OLLAMA_INFERENCE_MODEL:=__disabled__}",
                     model_type=ModelType.llm,
                 ),
                 ProviderModelEntry(
-                    provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:__disabled__}",
+                    provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}",
                     model_type=ModelType.embedding,
                     metadata={
-                        "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:384}",
+                        "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:=384}",
                     },
                 ),
             ],
             OllamaImplConfig.sample_run_config(
-                url="${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error=False
+                url="${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error=False
             ),
         ),
         (
             "anthropic",
             ANTHROPIC_MODEL_ENTRIES,
-            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"),
+            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"),
         ),
         (
             "gemini",
             GEMINI_MODEL_ENTRIES,
-            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
+            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"),
         ),
         (
             "groq",
             GROQ_MODEL_ENTRIES,
-            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
+            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"),
         ),
         (
             "sambanova",
             SAMBANOVA_MODEL_ENTRIES,
-            SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
+            SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:+}"),
         ),
         (
             "vllm",
             [
                 ProviderModelEntry(
-                    provider_model_id="${env.VLLM_INFERENCE_MODEL:__disabled__}",
+                    provider_model_id="${env.VLLM_INFERENCE_MODEL:=__disabled__}",
                     model_type=ModelType.llm,
                 ),
             ],
             VLLMInferenceAdapterConfig.sample_run_config(
-                url="${env.VLLM_URL:http://localhost:8000/v1}",
+                url="${env.VLLM_URL:=http://localhost:8000/v1}",
             ),
         ),
     ]
@@ -180,17 +180,17 @@ def get_distribution_template() -> DistributionTemplate:
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
index 361b0b680..3ac3968e8 100644
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) TGI server for running LLM inference
   providers:
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index 22b7bcde6..63da62a03 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: tgi
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -104,10 +104,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index dd012323c..430494121 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: tgi
 apis:
 - agents
@@ -26,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -39,17 +39,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -57,7 +57,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -65,14 +65,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -83,17 +83,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -103,10 +103,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
index 5ffeac873..518a843da 100644
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Together.AI for running LLM inference
   providers:
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index a24843416..7ae2a1d1a 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: together
 apis:
 - agents
@@ -16,7 +16,7 @@ providers:
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -110,13 +110,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index c71f960bd..dc09aeac9 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: together
 apis:
 - agents
@@ -16,7 +16,7 @@ providers:
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -105,13 +105,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml
index d5ff0f1f4..147dca50d 100644
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use a built-in vLLM engine for running LLM inference
   providers:
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index 6878c22b2..104b3a239 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: vllm-gpu
 apis:
 - agents
@@ -15,12 +15,12 @@ providers:
   - provider_id: vllm
     provider_type: inline::vllm
     config:
-      tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1}
-      max_tokens: ${env.MAX_TOKENS:4096}
-      max_model_len: ${env.MAX_MODEL_LEN:4096}
-      max_num_seqs: ${env.MAX_NUM_SEQS:4}
-      enforce_eager: ${env.ENFORCE_EAGER:False}
-      gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.3}
+      tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1}
+      max_tokens: ${env.MAX_TOKENS:=4096}
+      max_model_len: ${env.MAX_MODEL_LEN:=4096}
+      max_num_seqs: ${env.MAX_NUM_SEQS:=4}
+      enforce_eager: ${env.ENFORCE_EAGER:=False}
+      gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -31,7 +31,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -44,17 +44,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -62,7 +62,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -70,14 +70,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -88,17 +88,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -108,10 +108,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml
index e68ace183..08ee2c5ce 100644
--- a/llama_stack/templates/watsonx/build.yaml
+++ b/llama_stack/templates/watsonx/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use watsonx for running LLM inference
   providers:
diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml
index d60a87906..678bf72ff 100644
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: watsonx
 apis:
 - agents
@@ -15,9 +15,9 @@ providers:
   - provider_id: watsonx
     provider_type: remote::watsonx
     config:
-      url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
-      api_key: ${env.WATSONX_API_KEY:}
-      project_id: ${env.WATSONX_PROJECT_ID:}
+      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:+}
+      project_id: ${env.WATSONX_PROJECT_ID:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -28,7 +28,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -41,17 +41,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -59,7 +59,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -67,14 +67,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -85,17 +85,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -105,10 +105,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/llama-3-3-70b-instruct
diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml
index 158f6800f..60cff7503 100644
--- a/tests/external-provider/llama-stack-provider-ollama/run.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@@ -1,71 +1,101 @@
-version: '2'
+version: 2
 image_name: ollama
 apis:
+- agents
+- datasetio
+- eval
 - inference
+- safety
+- scoring
 - telemetry
 - tool_runtime
-- datasetio
 - vector_io
+
 providers:
   inference:
-  - provider_id: custom_ollama
-    provider_type: remote::custom_ollama
+  - provider_id: ollama
+    provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
+      metadata_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      agents_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200b}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      metadata_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
-      kvstore:
+      metadata_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
-      kvstore:
+      metadata_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
+
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/tests/unit/server/test_replace_env_vars.py b/tests/unit/server/test_replace_env_vars.py
index 7fcbbfde9..0fb7c395e 100644
--- a/tests/unit/server/test_replace_env_vars.py
+++ b/tests/unit/server/test_replace_env_vars.py
@@ -26,39 +26,44 @@ class TestReplaceEnvVars(unittest.TestCase):
         self.assertEqual(replace_env_vars("${env.TEST_VAR}"), "test_value")
 
     def test_default_value_when_not_set(self):
-        self.assertEqual(replace_env_vars("${env.NOT_SET:default}"), "default")
+        self.assertEqual(replace_env_vars("${env.NOT_SET:=default}"), "default")
 
     def test_default_value_when_set(self):
-        self.assertEqual(replace_env_vars("${env.TEST_VAR:default}"), "test_value")
+        self.assertEqual(replace_env_vars("${env.TEST_VAR:=default}"), "test_value")
 
     def test_default_value_when_empty(self):
-        self.assertEqual(replace_env_vars("${env.EMPTY_VAR:default}"), "default")
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR:=default}"), "default")
+
+    def test_empty_var_no_default(self):
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR_NO_DEFAULT:+}"), None)
 
     def test_conditional_value_when_set(self):
-        self.assertEqual(replace_env_vars("${env.TEST_VAR+conditional}"), "conditional")
+        self.assertEqual(replace_env_vars("${env.TEST_VAR:+conditional}"), "conditional")
 
     def test_conditional_value_when_not_set(self):
-        self.assertEqual(replace_env_vars("${env.NOT_SET+conditional}"), "")
+        self.assertEqual(replace_env_vars("${env.NOT_SET:+conditional}"), None)
 
     def test_conditional_value_when_empty(self):
-        self.assertEqual(replace_env_vars("${env.EMPTY_VAR+conditional}"), "")
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR:+conditional}"), None)
 
     def test_conditional_value_with_zero(self):
-        self.assertEqual(replace_env_vars("${env.ZERO_VAR+conditional}"), "conditional")
+        self.assertEqual(replace_env_vars("${env.ZERO_VAR:+conditional}"), "conditional")
 
     def test_mixed_syntax(self):
-        self.assertEqual(replace_env_vars("${env.TEST_VAR:default} and ${env.NOT_SET+conditional}"), "test_value and ")
         self.assertEqual(
-            replace_env_vars("${env.NOT_SET:default} and ${env.TEST_VAR+conditional}"), "default and conditional"
+            replace_env_vars("${env.TEST_VAR:=default} and ${env.NOT_SET:+conditional}"), "test_value and "
+        )
+        self.assertEqual(
+            replace_env_vars("${env.NOT_SET:=default} and ${env.TEST_VAR:+conditional}"), "default and conditional"
         )
 
     def test_nested_structures(self):
         data = {
-            "key1": "${env.TEST_VAR:default}",
-            "key2": ["${env.NOT_SET:default}", "${env.TEST_VAR+conditional}"],
-            "key3": {"nested": "${env.NOT_SET+conditional}"},
+            "key1": "${env.TEST_VAR:=default}",
+            "key2": ["${env.NOT_SET:=default}", "${env.TEST_VAR:+conditional}"],
+            "key3": {"nested": "${env.NOT_SET:+conditional}"},
         }
-        expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": ""}}
+        expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}}
         self.assertEqual(replace_env_vars(data), expected)
 
 
From dbdc811d1684cfac8056dacbc09784b0803eef40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 26 Jun 2025 10:14:27 +0200
Subject: [PATCH 8/8] chore: isolate bare minimum project dependencies (#2282)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

The goal is to promote the minimal set of dependencies the project needs
to run, this includes:

* dependencies needed to work with the CLI
* dependencies needed for the server to run with no providers

This also:
* Relocate redundant dependencies out of the core project and into the
  individual providers that actually require them.
* Include all necessary server dependencies so the project can run
  standalone, even without any providers.

<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan

Build and run distro a server.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/providers-build.yml       |  3 ++
 llama_stack/providers/registry/inference.py |  2 +-
 llama_stack/providers/registry/safety.py    |  2 +-
 llama_stack/providers/registry/scoring.py   |  2 +-
 pyproject.toml                              | 19 +++----
 requirements.txt                            | 53 +++++++++++++++++--
 uv.lock                                     | 58 ++++++++++++++++-----
 7 files changed, 111 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index 8268a0085..6de72cd60 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -11,6 +11,8 @@ on:
       - 'llama_stack/distribution/*.sh'
       - '.github/workflows/providers-build.yml'
       - 'llama_stack/templates/**'
+      - 'pyproject.toml'
+
   pull_request:
     paths:
       - 'llama_stack/cli/stack/build.py'
@@ -19,6 +21,7 @@ on:
       - 'llama_stack/distribution/*.sh'
       - '.github/workflows/providers-build.yml'
       - 'llama_stack/templates/**'
+      - 'pyproject.toml'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 66f2e8bce..47be57eee 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.inference,
             adapter=AdapterSpec(
                 adapter_type="ollama",
-                pip_packages=["ollama", "aiohttp"],
+                pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
                 config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
                 module="llama_stack.providers.remote.inference.ollama",
             ),
diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py
index e0a04be48..f0fe1e9f5 100644
--- a/llama_stack/providers/registry/safety.py
+++ b/llama_stack/providers/registry/safety.py
@@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.safety,
             adapter=AdapterSpec(
                 adapter_type="sambanova",
-                pip_packages=["litellm"],
+                pip_packages=["litellm", "requests"],
                 module="llama_stack.providers.remote.safety.sambanova",
                 config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
                 provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py
index 7980d6a13..244b06842 100644
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
         InlineProviderSpec(
             api=Api.scoring,
             provider_type="inline::basic",
-            pip_packages=[],
+            pip_packages=["requests"],
             module="llama_stack.providers.inline.scoring.basic",
             config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
             api_dependencies=[
diff --git a/pyproject.toml b/pyproject.toml
index 97624fade..99be1a80a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,8 +22,8 @@ classifiers = [
 ]
 dependencies = [
     "aiohttp",
-    "fastapi>=0.115.0,<1.0",
-    "fire",
+    "fastapi>=0.115.0,<1.0",                  # server
+    "fire",                                   # for MCP in LLS client
     "httpx",
     "huggingface-hub>=0.30.0,<1.0",
     "jinja2>=3.1.6",
@@ -34,15 +34,18 @@ dependencies = [
     "python-dotenv",
     "python-jose",
     "pydantic>=2",
-    "requests",
     "rich",
-    "setuptools",
     "starlette",
     "termcolor",
     "tiktoken",
     "pillow",
     "h11>=0.16.0",
-    "python-multipart>=0.0.20",
+    "python-multipart>=0.0.20",               # For fastapi Form
+    "uvicorn>=0.34.0",                        # server
+    "opentelemetry-sdk",                      # server
+    "opentelemetry-exporter-otlp-proto-http", # server
+    "aiosqlite>=0.21.0",                      # server - for metadata store
+    "asyncpg",                                # for metadata store
 ]
 
 [project.optional-dependencies]
@@ -67,7 +70,6 @@ dev = [
     "types-requests",
     "types-setuptools",
     "pre-commit",
-    "uvicorn",
     "ruamel.yaml",        # needed for openapi generator
 ]
 # These are the dependencies required for running unit tests.
@@ -80,7 +82,6 @@ unit = [
     "mcp",
     "chardet",
     "qdrant-client",
-    "opentelemetry-exporter-otlp-proto-http",
     "sqlalchemy",
     "sqlalchemy[asyncio]>=2.0.41",
     "blobfile",
@@ -96,8 +97,6 @@ test = [
     "aiohttp",
     "torch>=2.6.0",
     "torchvision>=0.21.0",
-    "opentelemetry-sdk",
-    "opentelemetry-exporter-otlp-proto-http",
     "chardet",
     "pypdf",
     "mcp",
@@ -106,6 +105,7 @@ test = [
     "transformers",
     "sqlalchemy",
     "sqlalchemy[asyncio]>=2.0.41",
+    "requests",
 ]
 docs = [
     "sphinx-autobuild",
@@ -122,6 +122,7 @@ docs = [
     "tomli",
     "linkify",
     "sphinxcontrib.openapi",
+    "requests",
 ]
 codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
 
diff --git a/requirements.txt b/requirements.txt
index 7e7aa38ce..2e016ef72 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,8 @@ aiohttp==3.11.13
     # via llama-stack
 aiosignal==1.3.2
     # via aiohttp
+aiosqlite==0.21.0
+    # via llama-stack
 annotated-types==0.7.0
     # via pydantic
 anyio==4.8.0
@@ -14,6 +16,8 @@ anyio==4.8.0
     #   llama-stack-client
     #   openai
     #   starlette
+asyncpg==0.30.0
+    # via llama-stack
 attrs==25.1.0
     # via
     #   aiohttp
@@ -27,11 +31,18 @@ certifi==2025.1.31
 charset-normalizer==3.4.1
     # via requests
 click==8.1.8
-    # via llama-stack-client
+    # via
+    #   llama-stack-client
+    #   uvicorn
 colorama==0.4.6 ; sys_platform == 'win32'
     # via
     #   click
     #   tqdm
+deprecated==1.2.18
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
 distro==1.9.0
     # via
     #   llama-stack-client
@@ -50,10 +61,13 @@ frozenlist==1.5.0
     #   aiosignal
 fsspec==2024.12.0
     # via huggingface-hub
+googleapis-common-protos==1.67.0
+    # via opentelemetry-exporter-otlp-proto-http
 h11==0.16.0
     # via
     #   httpcore
     #   llama-stack
+    #   uvicorn
 hf-xet==1.1.5 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
     # via huggingface-hub
 httpcore==1.0.9
@@ -71,6 +85,8 @@ idna==3.10
     #   httpx
     #   requests
     #   yarl
+importlib-metadata==8.5.0
+    # via opentelemetry-api
 jinja2==3.1.6
     # via llama-stack
 jiter==0.8.2
@@ -95,6 +111,25 @@ numpy==2.2.3
     # via pandas
 openai==1.71.0
     # via llama-stack
+opentelemetry-api==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp-proto-common==1.30.0
+    # via opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-http==1.30.0
+    # via llama-stack
+opentelemetry-proto==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.30.0
+    # via
+    #   llama-stack
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.51b0
+    # via opentelemetry-sdk
 packaging==24.2
     # via huggingface-hub
 pandas==2.2.3
@@ -109,6 +144,10 @@ propcache==0.3.0
     # via
     #   aiohttp
     #   yarl
+protobuf==5.29.3
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
 pyaml==25.1.0
     # via llama-stack-client
 pyasn1==0.4.8
@@ -148,7 +187,7 @@ regex==2024.11.6
 requests==2.32.4
     # via
     #   huggingface-hub
-    #   llama-stack
+    #   opentelemetry-exporter-otlp-proto-http
     #   tiktoken
 rich==13.9.4
     # via
@@ -160,8 +199,6 @@ rpds-py==0.22.3
     #   referencing
 rsa==4.9
     # via python-jose
-setuptools==80.8.0
-    # via llama-stack
 six==1.17.0
     # via
     #   ecdsa
@@ -189,11 +226,13 @@ tqdm==4.67.1
     #   openai
 typing-extensions==4.12.2
     # via
+    #   aiosqlite
     #   anyio
     #   fastapi
     #   huggingface-hub
     #   llama-stack-client
     #   openai
+    #   opentelemetry-sdk
     #   pydantic
     #   pydantic-core
     #   referencing
@@ -201,7 +240,13 @@ tzdata==2025.1
     # via pandas
 urllib3==2.3.0
     # via requests
+uvicorn==0.34.0
+    # via llama-stack
 wcwidth==0.2.13
     # via prompt-toolkit
+wrapt==1.17.2
+    # via deprecated
 yarl==1.18.3
     # via aiohttp
+zipp==3.21.0
+    # via importlib-metadata
diff --git a/uv.lock b/uv.lock
index 42eece4e1..31e296642 100644
--- a/uv.lock
+++ b/uv.lock
@@ -158,6 +158,38 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" },
 ]
 
+[[package]]
+name = "asyncpg"
+version = "0.30.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4c/0e/f5d708add0d0b97446c402db7e8dd4c4183c13edaabe8a8500b411e7b495/asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a", size = 674506, upload-time = "2024-10-20T00:29:27.988Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a0/67ec9a75cb24a1d99f97b8437c8d56da40e6f6bd23b04e2f4ea5d5ad82ac/asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed", size = 645922, upload-time = "2024-10-20T00:29:29.391Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/d9/a7584f24174bd86ff1053b14bb841f9e714380c672f61c906eb01d8ec433/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a", size = 3079565, upload-time = "2024-10-20T00:29:30.832Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/d7/a4c0f9660e333114bdb04d1a9ac70db690dd4ae003f34f691139a5cbdae3/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956", size = 3109962, upload-time = "2024-10-20T00:29:33.114Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/21/199fd16b5a981b1575923cbb5d9cf916fdc936b377e0423099f209e7e73d/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056", size = 3064791, upload-time = "2024-10-20T00:29:34.677Z" },
+    { url = "https://files.pythonhosted.org/packages/77/52/0004809b3427534a0c9139c08c87b515f1c77a8376a50ae29f001e53962f/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454", size = 3188696, upload-time = "2024-10-20T00:29:36.389Z" },
+    { url = "https://files.pythonhosted.org/packages/52/cb/fbad941cd466117be58b774a3f1cc9ecc659af625f028b163b1e646a55fe/asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d", size = 567358, upload-time = "2024-10-20T00:29:37.915Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0a/0a32307cf166d50e1ad120d9b81a33a948a1a5463ebfa5a96cc5606c0863/asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f", size = 629375, upload-time = "2024-10-20T00:29:39.987Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload-time = "2024-10-20T00:29:41.88Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload-time = "2024-10-20T00:29:43.352Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload-time = "2024-10-20T00:29:44.922Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload-time = "2024-10-20T00:29:46.891Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload-time = "2024-10-20T00:29:49.201Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload-time = "2024-10-20T00:29:50.768Z" },
+    { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload-time = "2024-10-20T00:29:52.394Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload-time = "2024-10-20T00:29:53.757Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload-time = "2024-10-20T00:29:55.165Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload-time = "2024-10-20T00:29:57.14Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload-time = "2024-10-20T00:29:58.499Z" },
+    { url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload-time = "2024-10-20T00:30:00.354Z" },
+    { url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload-time = "2024-10-20T00:30:02.794Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload-time = "2024-10-20T00:30:04.501Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload-time = "2024-10-20T00:30:06.537Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload-time = "2024-10-20T00:30:09.024Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.1.0"
@@ -1187,6 +1219,8 @@ version = "0.2.12"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },
+    { name = "aiosqlite" },
+    { name = "asyncpg" },
     { name = "fastapi" },
     { name = "fire" },
     { name = "h11" },
@@ -1196,18 +1230,19 @@ dependencies = [
     { name = "jsonschema" },
     { name = "llama-stack-client" },
     { name = "openai" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
     { name = "pillow" },
     { name = "prompt-toolkit" },
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "python-jose" },
     { name = "python-multipart" },
-    { name = "requests" },
     { name = "rich" },
-    { name = "setuptools" },
     { name = "starlette" },
     { name = "termcolor" },
     { name = "tiktoken" },
+    { name = "uvicorn" },
 ]
 
 [package.optional-dependencies]
@@ -1238,11 +1273,11 @@ dev = [
     { name = "ruff" },
     { name = "types-requests" },
     { name = "types-setuptools" },
-    { name = "uvicorn" },
 ]
 docs = [
     { name = "linkify" },
     { name = "myst-parser" },
+    { name = "requests" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
     { name = "sphinx-copybutton" },
@@ -1264,9 +1299,8 @@ test = [
     { name = "datasets" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
-    { name = "opentelemetry-sdk" },
     { name = "pypdf" },
+    { name = "requests" },
     { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
     { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
@@ -1282,7 +1316,6 @@ unit = [
     { name = "faiss-cpu" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "pypdf" },
     { name = "qdrant-client" },
     { name = "sqlalchemy", extra = ["asyncio"] },
@@ -1292,6 +1325,8 @@ unit = [
 [package.metadata]
 requires-dist = [
     { name = "aiohttp" },
+    { name = "aiosqlite", specifier = ">=0.21.0" },
+    { name = "asyncpg" },
     { name = "fastapi", specifier = ">=0.115.0,<1.0" },
     { name = "fire" },
     { name = "h11", specifier = ">=0.16.0" },
@@ -1302,6 +1337,8 @@ requires-dist = [
     { name = "llama-stack-client", specifier = ">=0.2.12" },
     { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.12" },
     { name = "openai", specifier = ">=1.66" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
     { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
     { name = "prompt-toolkit" },
@@ -1309,14 +1346,13 @@ requires-dist = [
     { name = "python-dotenv" },
     { name = "python-jose" },
     { name = "python-multipart", specifier = ">=0.0.20" },
-    { name = "requests" },
     { name = "rich" },
-    { name = "setuptools" },
     { name = "starlette" },
     { name = "streamlit", marker = "extra == 'ui'" },
     { name = "streamlit-option-menu", marker = "extra == 'ui'" },
     { name = "termcolor" },
     { name = "tiktoken" },
+    { name = "uvicorn", specifier = ">=0.34.0" },
 ]
 provides-extras = ["ui"]
 
@@ -1340,11 +1376,11 @@ dev = [
     { name = "ruff" },
     { name = "types-requests" },
     { name = "types-setuptools" },
-    { name = "uvicorn" },
 ]
 docs = [
     { name = "linkify" },
     { name = "myst-parser" },
+    { name = "requests" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
     { name = "sphinx-copybutton" },
@@ -1366,9 +1402,8 @@ test = [
     { name = "datasets" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
-    { name = "opentelemetry-sdk" },
     { name = "pypdf" },
+    { name = "requests" },
     { name = "sqlalchemy" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "torch", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" },
@@ -1383,7 +1418,6 @@ unit = [
     { name = "faiss-cpu" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "pypdf" },
     { name = "qdrant-client" },
     { name = "sqlalchemy" },