diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index 8268a0085..6de72cd60 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -11,6 +11,8 @@ on:
       - 'llama_stack/distribution/*.sh'
       - '.github/workflows/providers-build.yml'
       - 'llama_stack/templates/**'
+      - 'pyproject.toml'
+
   pull_request:
     paths:
       - 'llama_stack/cli/stack/build.py'
@@ -19,6 +21,7 @@ on:
       - 'llama_stack/distribution/*.sh'
       - '.github/workflows/providers-build.yml'
       - 'llama_stack/templates/**'
+      - 'pyproject.toml'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index affc426d6..f9e4bb38e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -7390,6 +7390,147 @@
                 ],
                 "title": "AgentTurnResponseTurnStartPayload"
             },
+            "OpenAIResponseAnnotationCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "url_citation",
+                        "default": "url_citation"
+                    },
+                    "end_index": {
+                        "type": "integer"
+                    },
+                    "start_index": {
+                        "type": "integer"
+                    },
+                    "title": {
+                        "type": "string"
+                    },
+                    "url": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "end_index",
+                    "start_index",
+                    "title",
+                    "url"
+                ],
+                "title": "OpenAIResponseAnnotationCitation"
+            },
+            "OpenAIResponseAnnotationContainerFileCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "container_file_citation",
+                        "default": "container_file_citation"
+                    },
+                    "container_id": {
+                        "type": "string"
+                    },
+                    "end_index": {
+                        "type": "integer"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    },
+                    "start_index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "container_id",
+                    "end_index",
+                    "file_id",
+                    "filename",
+                    "start_index"
+                ],
+                "title": "OpenAIResponseAnnotationContainerFileCitation"
+            },
+            "OpenAIResponseAnnotationFileCitation": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_citation",
+                        "default": "file_citation"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "filename": {
+                        "type": "string"
+                    },
+                    "index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file_id",
+                    "filename",
+                    "index"
+                ],
+                "title": "OpenAIResponseAnnotationFileCitation"
+            },
+            "OpenAIResponseAnnotationFilePath": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "file_path",
+                        "default": "file_path"
+                    },
+                    "file_id": {
+                        "type": "string"
+                    },
+                    "index": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "file_id",
+                    "index"
+                ],
+                "title": "OpenAIResponseAnnotationFilePath"
+            },
+            "OpenAIResponseAnnotations": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
+                        "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
+                        "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
+                        "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
+                    }
+                }
+            },
             "OpenAIResponseInput": {
                 "oneOf": [
                     {
@@ -7764,6 +7905,10 @@
                                 "type": "string",
                                 "const": "web_search"
                             },
+                            {
+                                "type": "string",
+                                "const": "web_search_preview"
+                            },
                             {
                                 "type": "string",
                                 "const": "web_search_preview_2025_03_11"
@@ -7855,12 +8000,19 @@
                         "type": "string",
                         "const": "output_text",
                         "default": "output_text"
+                    },
+                    "annotations": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseAnnotations"
+                        }
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "text",
-                    "type"
+                    "type",
+                    "annotations"
                 ],
                 "title": "OpenAIResponseOutputMessageContentOutputText"
             },
@@ -11190,6 +11342,115 @@
                 ],
                 "title": "InsertRequest"
             },
+            "Chunk": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Metadata associated with the chunk that will be used in the model context during inference."
+                    },
+                    "embedding": {
+                        "type": "array",
+                        "items": {
+                            "type": "number"
+                        },
+                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
+                    },
+                    "stored_chunk_id": {
+                        "type": "string",
+                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
+                    },
+                    "chunk_metadata": {
+                        "$ref": "#/components/schemas/ChunkMetadata",
+                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "metadata"
+                ],
+                "title": "Chunk",
+                "description": "A chunk of content that can be inserted into a vector database."
+            },
+            "ChunkMetadata": {
+                "type": "object",
+                "properties": {
+                    "chunk_id": {
+                        "type": "string",
+                        "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content."
+                    },
+                    "document_id": {
+                        "type": "string",
+                        "description": "The ID of the document this chunk belongs to."
+                    },
+                    "source": {
+                        "type": "string",
+                        "description": "The source of the content, such as a URL, file path, or other identifier."
+                    },
+                    "created_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was created."
+                    },
+                    "updated_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was last updated."
+                    },
+                    "chunk_window": {
+                        "type": "string",
+                        "description": "The window of the chunk, which can be used to group related chunks together."
+                    },
+                    "chunk_tokenizer": {
+                        "type": "string",
+                        "description": "The tokenizer used to create the chunk. Default is Tiktoken."
+                    },
+                    "chunk_embedding_model": {
+                        "type": "string",
+                        "description": "The embedding model used to create the chunk's embedding."
+                    },
+                    "chunk_embedding_dimension": {
+                        "type": "integer",
+                        "description": "The dimension of the embedding vector for the chunk."
+                    },
+                    "content_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the content of the chunk."
+                    },
+                    "metadata_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the metadata of the chunk."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "ChunkMetadata",
+                "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that     will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`     is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.     Use `Chunk.metadata` for metadata that will be used in the context during inference."
+            },
             "InsertChunksRequest": {
                 "type": "object",
                 "properties": {
@@ -11200,53 +11461,7 @@
                     "chunks": {
                         "type": "array",
                         "items": {
-                            "type": "object",
-                            "properties": {
-                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent",
-                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
-                                },
-                                "metadata": {
-                                    "type": "object",
-                                    "additionalProperties": {
-                                        "oneOf": [
-                                            {
-                                                "type": "null"
-                                            },
-                                            {
-                                                "type": "boolean"
-                                            },
-                                            {
-                                                "type": "number"
-                                            },
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "array"
-                                            },
-                                            {
-                                                "type": "object"
-                                            }
-                                        ]
-                                    },
-                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
-                                },
-                                "embedding": {
-                                    "type": "array",
-                                    "items": {
-                                        "type": "number"
-                                    },
-                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "content",
-                                "metadata"
-                            ],
-                            "title": "Chunk",
-                            "description": "A chunk of content that can be inserted into a vector database."
+                            "$ref": "#/components/schemas/Chunk"
                         },
                         "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
                     },
@@ -14671,53 +14886,7 @@
                     "chunks": {
                         "type": "array",
                         "items": {
-                            "type": "object",
-                            "properties": {
-                                "content": {
-                                    "$ref": "#/components/schemas/InterleavedContent",
-                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
-                                },
-                                "metadata": {
-                                    "type": "object",
-                                    "additionalProperties": {
-                                        "oneOf": [
-                                            {
-                                                "type": "null"
-                                            },
-                                            {
-                                                "type": "boolean"
-                                            },
-                                            {
-                                                "type": "number"
-                                            },
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "type": "array"
-                                            },
-                                            {
-                                                "type": "object"
-                                            }
-                                        ]
-                                    },
-                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
-                                },
-                                "embedding": {
-                                    "type": "array",
-                                    "items": {
-                                        "type": "number"
-                                    },
-                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "content",
-                                "metadata"
-                            ],
-                            "title": "Chunk",
-                            "description": "A chunk of content that can be inserted into a vector database."
+                            "$ref": "#/components/schemas/Chunk"
                         }
                     },
                     "scores": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 1e1293dc2..9175c97fc 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5263,6 +5263,106 @@ components:
         - event_type
         - turn_id
       title: AgentTurnResponseTurnStartPayload
+    OpenAIResponseAnnotationCitation:
+      type: object
+      properties:
+        type:
+          type: string
+          const: url_citation
+          default: url_citation
+        end_index:
+          type: integer
+        start_index:
+          type: integer
+        title:
+          type: string
+        url:
+          type: string
+      additionalProperties: false
+      required:
+        - type
+        - end_index
+        - start_index
+        - title
+        - url
+      title: OpenAIResponseAnnotationCitation
+    "OpenAIResponseAnnotationContainerFileCitation":
+      type: object
+      properties:
+        type:
+          type: string
+          const: container_file_citation
+          default: container_file_citation
+        container_id:
+          type: string
+        end_index:
+          type: integer
+        file_id:
+          type: string
+        filename:
+          type: string
+        start_index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - container_id
+        - end_index
+        - file_id
+        - filename
+        - start_index
+      title: >-
+        OpenAIResponseAnnotationContainerFileCitation
+    OpenAIResponseAnnotationFileCitation:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file_citation
+          default: file_citation
+        file_id:
+          type: string
+        filename:
+          type: string
+        index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - file_id
+        - filename
+        - index
+      title: OpenAIResponseAnnotationFileCitation
+    OpenAIResponseAnnotationFilePath:
+      type: object
+      properties:
+        type:
+          type: string
+          const: file_path
+          default: file_path
+        file_id:
+          type: string
+        index:
+          type: integer
+      additionalProperties: false
+      required:
+        - type
+        - file_id
+        - index
+      title: OpenAIResponseAnnotationFilePath
+    OpenAIResponseAnnotations:
+      oneOf:
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
+      discriminator:
+        propertyName: type
+        mapping:
+          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
+          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
+          container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
+          file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
     OpenAIResponseInput:
       oneOf:
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@@ -5488,6 +5588,8 @@ components:
           oneOf:
             - type: string
               const: web_search
+            - type: string
+              const: web_search_preview
             - type: string
               const: web_search_preview_2025_03_11
           default: web_search
@@ -5547,10 +5649,15 @@ components:
           type: string
           const: output_text
           default: output_text
+        annotations:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseAnnotations'
       additionalProperties: false
       required:
         - text
         - type
+        - annotations
       title: >-
         OpenAIResponseOutputMessageContentOutputText
     "OpenAIResponseOutputMessageFileSearchToolCall":
@@ -7867,6 +7974,107 @@ components:
         - vector_db_id
         - chunk_size_in_tokens
       title: InsertRequest
+    Chunk:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The content of the chunk, which can be interleaved text, images, or other
+            types.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Metadata associated with the chunk that will be used in the model context
+            during inference.
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding for the chunk. If not provided, it will be computed
+            later.
+        stored_chunk_id:
+          type: string
+          description: >-
+            The chunk ID that is stored in the vector database. Used for backend functionality.
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: >-
+            Metadata for the chunk that will NOT be used in the context during inference.
+            The `chunk_metadata` is required backend functionality.
+      additionalProperties: false
+      required:
+        - content
+        - metadata
+      title: Chunk
+      description: >-
+        A chunk of content that can be inserted into a vector database.
+    ChunkMetadata:
+      type: object
+      properties:
+        chunk_id:
+          type: string
+          description: >-
+            The ID of the chunk. If not set, it will be generated based on the document
+            ID and content.
+        document_id:
+          type: string
+          description: >-
+            The ID of the document this chunk belongs to.
+        source:
+          type: string
+          description: >-
+            The source of the content, such as a URL, file path, or other identifier.
+        created_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was created.
+        updated_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was last updated.
+        chunk_window:
+          type: string
+          description: >-
+            The window of the chunk, which can be used to group related chunks together.
+        chunk_tokenizer:
+          type: string
+          description: >-
+            The tokenizer used to create the chunk. Default is Tiktoken.
+        chunk_embedding_model:
+          type: string
+          description: >-
+            The embedding model used to create the chunk's embedding.
+        chunk_embedding_dimension:
+          type: integer
+          description: >-
+            The dimension of the embedding vector for the chunk.
+        content_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the content of the chunk.
+        metadata_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the metadata of the chunk.
+      additionalProperties: false
+      title: ChunkMetadata
+      description: >-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
+        information about the chunk that     will not be used in the context during
+        inference, but is required for backend functionality. The `ChunkMetadata`     is
+        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
+        expected to change after.     Use `Chunk.metadata` for metadata that will
+        be used in the context during inference.
     InsertChunksRequest:
       type: object
       properties:
@@ -7877,40 +8085,7 @@ components:
         chunks:
           type: array
           items:
-            type: object
-            properties:
-              content:
-                $ref: '#/components/schemas/InterleavedContent'
-                description: >-
-                  The content of the chunk, which can be interleaved text, images,
-                  or other types.
-              metadata:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  Metadata associated with the chunk, such as document ID, source,
-                  or other relevant information.
-              embedding:
-                type: array
-                items:
-                  type: number
-                description: >-
-                  Optional embedding for the chunk. If not provided, it will be computed
-                  later.
-            additionalProperties: false
-            required:
-              - content
-              - metadata
-            title: Chunk
-            description: >-
-              A chunk of content that can be inserted into a vector database.
+            $ref: '#/components/schemas/Chunk'
           description: >-
             The chunks to insert. Each `Chunk` should contain content which can be
             interleaved text, images, or other types. `metadata`: `dict[str, Any]`
@@ -10231,40 +10406,7 @@ components:
         chunks:
           type: array
           items:
-            type: object
-            properties:
-              content:
-                $ref: '#/components/schemas/InterleavedContent'
-                description: >-
-                  The content of the chunk, which can be interleaved text, images,
-                  or other types.
-              metadata:
-                type: object
-                additionalProperties:
-                  oneOf:
-                    - type: 'null'
-                    - type: boolean
-                    - type: number
-                    - type: string
-                    - type: array
-                    - type: object
-                description: >-
-                  Metadata associated with the chunk, such as document ID, source,
-                  or other relevant information.
-              embedding:
-                type: array
-                items:
-                  type: number
-                description: >-
-                  Optional embedding for the chunk. If not provided, it will be computed
-                  later.
-            additionalProperties: false
-            required:
-              - content
-              - metadata
-            title: Chunk
-            description: >-
-              A chunk of content that can be inserted into a vector database.
+            $ref: '#/components/schemas/Chunk'
         scores:
           type: array
           items:
diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md
index dd73d93ea..1b50ee712 100644
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@@ -18,7 +18,7 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -26,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -38,7 +38,7 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -46,7 +46,7 @@ providers:
 metadata_store:
   namespace: null
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
@@ -85,7 +85,7 @@ providers:
     # config is a dictionary that contains the configuration for the provider.
     # in this case, the configuration is the url of the ollama server
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
 ```
 A few things to note:
 - A _provider instance_ is identified with an (id, type, configuration) triplet.
@@ -94,6 +94,95 @@ A few things to note:
 - The configuration dictionary is provider-specific.
 - Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
 
+### Environment Variable Substitution
+
+Llama Stack supports environment variable substitution in configuration values using the
+`${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide
+different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html)
+and follows similar patterns.
+
+#### Basic Syntax
+
+The basic syntax for environment variable substitution is:
+
+```yaml
+config:
+  api_key: ${env.API_KEY}
+  url: ${env.SERVICE_URL}
+```
+
+If the environment variable is not set, the server will raise an error during startup.
+
+#### Default Values
+
+You can provide default values using the `:=` operator:
+
+```yaml
+config:
+  url: ${env.OLLAMA_URL:=http://localhost:11434}
+  port: ${env.PORT:=8321}
+  timeout: ${env.TIMEOUT:=60}
+```
+
+If the environment variable is not set, the default value `http://localhost:11434` will be used.
+Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set.
+
+#### Conditional Values
+
+You can use the `:+` operator to provide a value only when the environment variable is set:
+
+```yaml
+config:
+  # Only include this field if ENVIRONMENT is set
+  environment: ${env.ENVIRONMENT:+production}
+```
+
+If the environment variable is set, the value after `:+` will be used. If it's not set, the field
+will be omitted with a `None` value.
+So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment
+variable is not set. It can be used to make a field optional and then enabled at runtime when desired.
+
+#### Examples
+
+Here are some common patterns:
+
+```yaml
+# Required environment variable (will error if not set)
+api_key: ${env.OPENAI_API_KEY}
+
+# Optional with default
+base_url: ${env.API_BASE_URL:=https://api.openai.com/v1}
+
+# Conditional field
+debug_mode: ${env.DEBUG:+true}
+
+# Optional field that becomes None if not set
+optional_token: ${env.OPTIONAL_TOKEN:+}
+```
+
+#### Runtime Override
+
+You can override environment variables at runtime when starting the server:
+
+```bash
+# Override specific environment variables
+llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
+
+# Or set them in your shell
+export API_KEY=sk-123
+export BASE_URL=https://custom-api.com
+llama stack run --config run.yaml
+```
+
+#### Type Safety
+
+The environment variable substitution system is type-safe:
+
+- String values remain strings
+- Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None`
+- Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer)
+- Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean)
+
 ## Resources
 
 Finally, let's look at the `models` section:
@@ -109,6 +198,18 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i
 
 What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
 
+If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below:
+
+```yaml
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL:__disabled__}
+  provider_id: ollama
+  provider_model_id: ${env.INFERENCE_MODEL:__disabled__}
+```
+
+The snippet above will only register this model if the environment variable `INFERENCE_MODEL` is set and non-empty. If the environment variable is not set, the model will not get registered at all.
+
 ## Server Configuration
 
 The `server` section configures the HTTP server that serves the Llama Stack APIs:
@@ -140,7 +241,7 @@ server:
     config:
       jwks:
         uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
-        token: "${env.TOKEN:}"
+        token: "${env.TOKEN:+}"
         key_recheck_period: 3600
       tls_cafile: "/path/to/ca.crt"
       issuer: "https://kubernetes.default.svc"
@@ -384,12 +485,12 @@ providers:
   - provider_id: vllm-0
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:http://localhost:8000}
+      url: ${env.VLLM_URL:=http://localhost:8000}
   # this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
   - provider_id: vllm-1
     provider_type: remote::vllm
     config:
-      url: ${env.SAFETY_VLLM_URL:http://localhost:8001}
+      url: ${env.SAFETY_VLLM_URL:=http://localhost:8001}
 ...
 models:
 - metadata: {}
diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml
index fa7bacd8f..0a08bca03 100644
--- a/docs/source/distributions/k8s/stack-configmap.yaml
+++ b/docs/source/distributions/k8s/stack-configmap.yaml
@@ -15,10 +15,10 @@ data:
       - provider_id: vllm-inference
         provider_type: remote::vllm
         config:
-          url: ${env.VLLM_URL:http://localhost:8000/v1}
-          max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-          api_token: ${env.VLLM_API_TOKEN:fake}
-          tls_verify: ${env.VLLM_TLS_VERIFY:true}
+          url: ${env.VLLM_URL:=http://localhost:8000/v1}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
       - provider_id: vllm-safety
         provider_type: remote::vllm
         config:
@@ -30,10 +30,10 @@ data:
         provider_type: inline::sentence-transformers
         config: {}
       vector_io:
-      - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
         provider_type: remote::chromadb
         config:
-          url: ${env.CHROMADB_URL:}
+          url: ${env.CHROMADB_URL:+}
       safety:
       - provider_id: llama-guard
         provider_type: inline::llama-guard
@@ -45,34 +45,34 @@ data:
         config:
           persistence_store:
             type: postgres
-            host: ${env.POSTGRES_HOST:localhost}
-            port: ${env.POSTGRES_PORT:5432}
-            db: ${env.POSTGRES_DB:llamastack}
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
             user: ${env.POSTGRES_USER:llamastack}
-            password: ${env.POSTGRES_PASSWORD:llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
           responses_store:
             type: postgres
-            host: ${env.POSTGRES_HOST:localhost}
-            port: ${env.POSTGRES_PORT:5432}
-            db: ${env.POSTGRES_DB:llamastack}
-            user: ${env.POSTGRES_USER:llamastack}
-            password: ${env.POSTGRES_PASSWORD:llamastack}
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
       telemetry:
       - provider_id: meta-reference
         provider_type: inline::meta-reference
         config:
-          service_name: ${env.OTEL_SERVICE_NAME:}
+          service_name: ${env.OTEL_SERVICE_NAME:+}
           sinks: ${env.TELEMETRY_SINKS:console}
       tool_runtime:
       - provider_id: brave-search
         provider_type: remote::brave-search
         config:
-          api_key: ${env.BRAVE_SEARCH_API_KEY:}
+          api_key: ${env.BRAVE_SEARCH_API_KEY:+}
           max_results: 3
       - provider_id: tavily-search
         provider_type: remote::tavily-search
         config:
-          api_key: ${env.TAVILY_SEARCH_API_KEY:}
+          api_key: ${env.TAVILY_SEARCH_API_KEY:+}
           max_results: 3
       - provider_id: rag-runtime
         provider_type: inline::rag-runtime
@@ -82,19 +82,19 @@ data:
         config: {}
     metadata_store:
       type: postgres
-      host: ${env.POSTGRES_HOST:localhost}
-      port: ${env.POSTGRES_PORT:5432}
-      db: ${env.POSTGRES_DB:llamastack}
-      user: ${env.POSTGRES_USER:llamastack}
-      password: ${env.POSTGRES_PASSWORD:llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
       table_name: llamastack_kvstore
     inference_store:
       type: postgres
-      host: ${env.POSTGRES_HOST:localhost}
-      port: ${env.POSTGRES_PORT:5432}
-      db: ${env.POSTGRES_DB:llamastack}
-      user: ${env.POSTGRES_USER:llamastack}
-      password: ${env.POSTGRES_PASSWORD:llamastack}
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
     models:
     - metadata:
         embedding_dimension: 384
@@ -106,11 +106,11 @@ data:
       provider_id: vllm-inference
       model_type: llm
     - metadata: {}
-      model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+      model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
       provider_id: vllm-safety
       model_type: llm
     shields:
-    - shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+    - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
     vector_dbs: []
     datasets: []
     scoring_fns: []
diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml
index 8e2773dd1..5ac08134c 100644
--- a/docs/source/distributions/k8s/stack_run_config.yaml
+++ b/docs/source/distributions/k8s/stack_run_config.yaml
@@ -12,25 +12,25 @@ providers:
   - provider_id: vllm-inference
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: vllm-safety
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
+      url: ${env.CHROMADB_URL:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -42,34 +42,34 @@ providers:
     config:
       persistence_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
       responses_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
-      sinks: ${env.TELEMETRY_SINKS:console}
+      service_name: ${env.OTEL_SERVICE_NAME:+console}
+      sinks: ${env.TELEMETRY_SINKS:+console}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -79,19 +79,19 @@ providers:
     config: {}
 metadata_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
   table_name: llamastack_kvstore
 inference_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
 models:
 - metadata:
     embedding_dimension: 384
@@ -103,11 +103,11 @@ models:
   provider_id: vllm-inference
   model_type: llm
 - metadata: {}
-  model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+  model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
   provider_id: vllm-safety
   model_type: llm
 shields:
-- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
 vector_dbs: []
 datasets: []
 scoring_fns: []
diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py
index ab203b6cd..6416b283b 100644
--- a/llama_stack/apis/agents/__init__.py
+++ b/llama_stack/apis/agents/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .agents import *  # noqa: F401 F403
+from .agents import *
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index addb72f14..27b85e2d6 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
 
 
+@json_schema_type
+class OpenAIResponseAnnotationFileCitation(BaseModel):
+    type: Literal["file_citation"] = "file_citation"
+    file_id: str
+    filename: str
+    index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationCitation(BaseModel):
+    type: Literal["url_citation"] = "url_citation"
+    end_index: int
+    start_index: int
+    title: str
+    url: str
+
+
+@json_schema_type
+class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
+    type: Literal["container_file_citation"] = "container_file_citation"
+    container_id: str
+    end_index: int
+    file_id: str
+    filename: str
+    start_index: int
+
+
+@json_schema_type
+class OpenAIResponseAnnotationFilePath(BaseModel):
+    type: Literal["file_path"] = "file_path"
+    file_id: str
+    index: int
+
+
+OpenAIResponseAnnotations = Annotated[
+    OpenAIResponseAnnotationFileCitation
+    | OpenAIResponseAnnotationCitation
+    | OpenAIResponseAnnotationContainerFileCitation
+    | OpenAIResponseAnnotationFilePath,
+    Field(discriminator="type"),
+]
+register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
+
+
 @json_schema_type
 class OpenAIResponseOutputMessageContentOutputText(BaseModel):
     text: str
     type: Literal["output_text"] = "output_text"
+    annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
 
 
 OpenAIResponseOutputMessageContent = Annotated[
@@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 
 
+# Must match type Literals of OpenAIResponseInputToolWebSearch below
+WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
+
+
 @json_schema_type
 class OpenAIResponseInputToolWebSearch(BaseModel):
-    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+    # Must match values of WebSearchToolTypes above
+    type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
+        "web_search"
+    )
     # TODO: actually use search_context_size somewhere...
     search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
     # TODO: add user_location
diff --git a/llama_stack/apis/batch_inference/__init__.py b/llama_stack/apis/batch_inference/__init__.py
index 3249475ee..b9b2944b2 100644
--- a/llama_stack/apis/batch_inference/__init__.py
+++ b/llama_stack/apis/batch_inference/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .batch_inference import *  # noqa: F401 F403
+from .batch_inference import *
diff --git a/llama_stack/apis/benchmarks/__init__.py b/llama_stack/apis/benchmarks/__init__.py
index f8f564957..62d1b367c 100644
--- a/llama_stack/apis/benchmarks/__init__.py
+++ b/llama_stack/apis/benchmarks/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .benchmarks import *  # noqa: F401 F403
+from .benchmarks import *
diff --git a/llama_stack/apis/datasetio/__init__.py b/llama_stack/apis/datasetio/__init__.py
index 378afbba8..8c087bfa4 100644
--- a/llama_stack/apis/datasetio/__init__.py
+++ b/llama_stack/apis/datasetio/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .datasetio import *  # noqa: F401 F403
+from .datasetio import *
diff --git a/llama_stack/apis/datasets/__init__.py b/llama_stack/apis/datasets/__init__.py
index 102b9927f..9c9a128d2 100644
--- a/llama_stack/apis/datasets/__init__.py
+++ b/llama_stack/apis/datasets/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .datasets import *  # noqa: F401 F403
+from .datasets import *
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index e3de3d5cb..8bf7a48d0 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import Enum, StrEnum
 from typing import Annotated, Any, Literal, Protocol
 
 from pydantic import BaseModel, Field
@@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 
 
-class DatasetPurpose(str, Enum):
+class DatasetPurpose(StrEnum):
     """
     Purpose of the dataset. Each purpose has a required input data schema.
 
diff --git a/llama_stack/apis/eval/__init__.py b/llama_stack/apis/eval/__init__.py
index 5f91ad70d..28a1d6049 100644
--- a/llama_stack/apis/eval/__init__.py
+++ b/llama_stack/apis/eval/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .eval import *  # noqa: F401 F403
+from .eval import *
diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py
index 269baf177..189e4de19 100644
--- a/llama_stack/apis/files/__init__.py
+++ b/llama_stack/apis/files/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .files import *  # noqa: F401 F403
+from .files import *
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 4dfeed448..a72dcd8d4 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Annotated, Literal, Protocol, runtime_checkable
 
 from fastapi import File, Form, Response, UploadFile
@@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 
 # OpenAI Files API Models
-class OpenAIFilePurpose(str, Enum):
+class OpenAIFilePurpose(StrEnum):
     """
     Valid purpose values for OpenAI Files API.
     """
diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py
index f9f77f769..f0c8783c1 100644
--- a/llama_stack/apis/inference/__init__.py
+++ b/llama_stack/apis/inference/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .inference import *  # noqa: F401 F403
+from .inference import *
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index c64a5f750..222099064 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -20,7 +20,7 @@ from typing_extensions import TypedDict
 from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
-from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
+from llama_stack.apis.telemetry import MetricResponseMixin
 from llama_stack.models.llama.datatypes import (
     BuiltinTool,
     StopReason,
diff --git a/llama_stack/apis/inspect/__init__.py b/llama_stack/apis/inspect/__init__.py
index 88ba8e908..016937e3d 100644
--- a/llama_stack/apis/inspect/__init__.py
+++ b/llama_stack/apis/inspect/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .inspect import *  # noqa: F401 F403
+from .inspect import *
diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py
index 410d8d1f9..ee90106b6 100644
--- a/llama_stack/apis/models/__init__.py
+++ b/llama_stack/apis/models/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .models import *  # noqa: F401 F403
+from .models import *
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 3d90a92a0..36da97e62 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, ConfigDict, Field
@@ -22,7 +22,7 @@ class CommonModelFields(BaseModel):
 
 
 @json_schema_type
-class ModelType(str, Enum):
+class ModelType(StrEnum):
     llm = "llm"
     embedding = "embedding"
 
diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py
index 7129c4abd..695575a30 100644
--- a/llama_stack/apis/post_training/__init__.py
+++ b/llama_stack/apis/post_training/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .post_training import *  # noqa: F401 F403
+from .post_training import *
diff --git a/llama_stack/apis/providers/__init__.py b/llama_stack/apis/providers/__init__.py
index b554a5d23..e35e2fe47 100644
--- a/llama_stack/apis/providers/__init__.py
+++ b/llama_stack/apis/providers/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .providers import *  # noqa: F401 F403
+from .providers import *
diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py
index dc3fe90b4..d93bc1355 100644
--- a/llama_stack/apis/safety/__init__.py
+++ b/llama_stack/apis/safety/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .safety import *  # noqa: F401 F403
+from .safety import *
diff --git a/llama_stack/apis/scoring/__init__.py b/llama_stack/apis/scoring/__init__.py
index 0739dfc80..624b9e704 100644
--- a/llama_stack/apis/scoring/__init__.py
+++ b/llama_stack/apis/scoring/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .scoring import *  # noqa: F401 F403
+from .scoring import *
diff --git a/llama_stack/apis/scoring_functions/__init__.py b/llama_stack/apis/scoring_functions/__init__.py
index b96acb45f..fc1de0311 100644
--- a/llama_stack/apis/scoring_functions/__init__.py
+++ b/llama_stack/apis/scoring_functions/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .scoring_functions import *  # noqa: F401 F403
+from .scoring_functions import *
diff --git a/llama_stack/apis/shields/__init__.py b/llama_stack/apis/shields/__init__.py
index edad26100..783a4d124 100644
--- a/llama_stack/apis/shields/__init__.py
+++ b/llama_stack/apis/shields/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .shields import *  # noqa: F401 F403
+from .shields import *
diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py
index cfdec76ce..bc169e8e6 100644
--- a/llama_stack/apis/synthetic_data_generation/__init__.py
+++ b/llama_stack/apis/synthetic_data_generation/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .synthetic_data_generation import *  # noqa: F401 F403
+from .synthetic_data_generation import *
diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py
index 6a111dc9e..1250767f7 100644
--- a/llama_stack/apis/telemetry/__init__.py
+++ b/llama_stack/apis/telemetry/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .telemetry import *  # noqa: F401 F403
+from .telemetry import *
diff --git a/llama_stack/apis/tools/__init__.py b/llama_stack/apis/tools/__init__.py
index be8846ba2..b25310ecf 100644
--- a/llama_stack/apis/tools/__init__.py
+++ b/llama_stack/apis/tools/__init__.py
@@ -4,5 +4,5 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .rag_tool import *  # noqa: F401 F403
-from .tools import *  # noqa: F401 F403
+from .rag_tool import *
+from .tools import *
diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_dbs/__init__.py
index 158241a6d..af34ba9d4 100644
--- a/llama_stack/apis/vector_dbs/__init__.py
+++ b/llama_stack/apis/vector_dbs/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .vector_dbs import *  # noqa: F401 F403
+from .vector_dbs import *
diff --git a/llama_stack/apis/vector_io/__init__.py b/llama_stack/apis/vector_io/__init__.py
index 3fe4fa4b6..3f4c60805 100644
--- a/llama_stack/apis/vector_io/__init__.py
+++ b/llama_stack/apis/vector_io/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from .vector_io import *  # noqa: F401 F403
+from .vector_io import *
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index d6de0108c..2d4131315 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -8,6 +8,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import uuid
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 
 from pydantic import BaseModel, Field
@@ -15,21 +16,80 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
 from llama_stack.strong_typing.schema import register_schema
 
 
+@json_schema_type
+class ChunkMetadata(BaseModel):
+    """
+    `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
+        will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
+        is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
+        Use `Chunk.metadata` for metadata that will be used in the context during inference.
+    :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content.
+    :param document_id: The ID of the document this chunk belongs to.
+    :param source: The source of the content, such as a URL, file path, or other identifier.
+    :param created_timestamp: An optional timestamp indicating when the chunk was created.
+    :param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
+    :param chunk_window: The window of the chunk, which can be used to group related chunks together.
+    :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
+    :param chunk_embedding_model: The embedding model used to create the chunk's embedding.
+    :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
+    :param content_token_count: The number of tokens in the content of the chunk.
+    :param metadata_token_count: The number of tokens in the metadata of the chunk.
+    """
+
+    chunk_id: str | None = None
+    document_id: str | None = None
+    source: str | None = None
+    created_timestamp: int | None = None
+    updated_timestamp: int | None = None
+    chunk_window: str | None = None
+    chunk_tokenizer: str | None = None
+    chunk_embedding_model: str | None = None
+    chunk_embedding_dimension: int | None = None
+    content_token_count: int | None = None
+    metadata_token_count: int | None = None
+
+
+@json_schema_type
 class Chunk(BaseModel):
     """
     A chunk of content that can be inserted into a vector database.
     :param content: The content of the chunk, which can be interleaved text, images, or other types.
     :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
-    :param metadata: Metadata associated with the chunk, such as document ID, source, or other relevant information.
+    :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
+    :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
+    :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
+        The `chunk_metadata` is required backend functionality.
     """
 
     content: InterleavedContent
     metadata: dict[str, Any] = Field(default_factory=dict)
     embedding: list[float] | None = None
+    # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
+    stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
+    chunk_metadata: ChunkMetadata | None = None
+
+    model_config = {"populate_by_name": True}
+
+    def model_post_init(self, __context):
+        # Extract chunk_id from metadata if present
+        if self.metadata and "chunk_id" in self.metadata:
+            self.stored_chunk_id = self.metadata.pop("chunk_id")
+
+    @property
+    def chunk_id(self) -> str:
+        """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
+        if self.stored_chunk_id:
+            return self.stored_chunk_id
+
+        if "document_id" in self.metadata:
+            return generate_chunk_id(self.metadata["document_id"], str(self.content))
+
+        return generate_chunk_id(str(uuid.uuid4()), str(self.content))
 
 
 @json_schema_type
diff --git a/llama_stack/distribution/access_control/datatypes.py b/llama_stack/distribution/access_control/datatypes.py
index bc5ed6645..c833ed51b 100644
--- a/llama_stack/distribution/access_control/datatypes.py
+++ b/llama_stack/distribution/access_control/datatypes.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Self
 
 from pydantic import BaseModel, model_validator
@@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator
 from .conditions import parse_conditions
 
 
-class Action(str, Enum):
+class Action(StrEnum):
     CREATE = "create"
     READ = "read"
     UPDATE = "update"
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index abc3f0065..e07da001e 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Any
 
@@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
 
-LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
-LLAMA_STACK_RUN_CONFIG_VERSION = "2"
+LLAMA_STACK_BUILD_CONFIG_VERSION = 2
+LLAMA_STACK_RUN_CONFIG_VERSION = 2
 
 
 RoutingKey = str | list[str]
@@ -159,7 +159,7 @@ class LoggingConfig(BaseModel):
     )
 
 
-class AuthProviderType(str, Enum):
+class AuthProviderType(StrEnum):
     """Supported authentication provider types."""
 
     OAUTH2_TOKEN = "oauth2_token"
@@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception):
     pass
 
 
-class QuotaPeriod(str, Enum):
+class QuotaPeriod(StrEnum):
     DAY = "day"
 
 
@@ -229,7 +229,7 @@ class ServerConfig(BaseModel):
 
 
 class StackRunConfig(BaseModel):
-    version: str = LLAMA_STACK_RUN_CONFIG_VERSION
+    version: int = LLAMA_STACK_RUN_CONFIG_VERSION
 
     image_name: str = Field(
         ...,
@@ -300,7 +300,7 @@ a default SQLite store will be used.""",
 
 
 class BuildConfig(BaseModel):
-    version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
+    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
 
     distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
     image_type: str = Field(
diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/distribution/routers/inference.py
index 50c429315..b39da7810 100644
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
@@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
     ListOpenAIChatCompletionResponse,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAICompletionWithInputMessages,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     Order,
     ResponseFormat,
     SamplingParams,
@@ -41,14 +47,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.log import get_logger
diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py
index 6af3bd416..4bd5952dc 100644
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import (
     QueryChunksResponse,
     SearchRankingOptions,
     VectorIO,
-    VectorStoreDeleteResponse,
-    VectorStoreListResponse,
-    VectorStoreObject,
-    VectorStoreSearchResponsePage,
-)
-from llama_stack.apis.vector_io.vector_io import (
     VectorStoreChunkingStrategy,
+    VectorStoreDeleteResponse,
     VectorStoreFileContentsResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFileStatus,
+    VectorStoreListResponse,
+    VectorStoreObject,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 5a9708497..c86880669 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -98,6 +98,15 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
 
         method = getattr(impls[api], register_method)
         for obj in objects:
+            # In complex templates, like our starter template, we may have dynamic model ids
+            # given by environment variables. This allows those environment variables to have
+            # a default value of __disabled__ to skip registration of the model if not set.
+            if (
+                hasattr(obj, "provider_model_id")
+                and obj.provider_model_id is not None
+                and "__disabled__" in obj.provider_model_id
+            ):
+                continue
             # we want to maintain the type information in arguments to method.
             # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
             # we use model_dump() to find all the attrs and then getattr to get the still typed value.
@@ -118,7 +127,12 @@ class EnvVarError(Exception):
     def __init__(self, var_name: str, path: str = ""):
         self.var_name = var_name
         self.path = path
-        super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
+        super().__init__(
+            f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
+            f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
+            f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
+            f"or ensure the environment variable is set."
+        )
 
 
 def replace_env_vars(config: Any, path: str = "") -> Any:
@@ -141,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
         return result
 
     elif isinstance(config, str):
-        # Updated pattern to support both default values (:) and conditional values (+)
-        pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}"
+        # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
+        pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
 
-        def get_env_var(match):
+        def get_env_var(match: re.Match):
             env_var = match.group(1)
-            operator = match.group(2)  # ':' for default, '+' for conditional
+            operator = match.group(2)  # '=' for default, '+' for conditional
             value_expr = match.group(3)
 
             env_value = os.environ.get(env_var)
 
-            if operator == ":":  # Default value syntax: ${env.FOO:default}
+            if operator == "=":  # Default value syntax: ${env.FOO:=default}
                 if not env_value:
-                    if value_expr is None:
+                    # value_expr returns empty string (not None) when not matched
+                    # This means ${env.FOO:=} is an error
+                    if value_expr == "":
                         raise EnvVarError(env_var, path)
                     else:
                         value = value_expr
                 else:
                     value = env_value
-            elif operator == "+":  # Conditional value syntax: ${env.FOO+value_if_set}
+            elif operator == "+":  # Conditional value syntax: ${env.FOO:+value_if_set}
                 if env_value:
                     value = value_expr
                 else:
@@ -174,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
             return os.path.expanduser(value)
 
         try:
-            return re.sub(pattern, get_env_var, config)
+            result = re.sub(pattern, get_env_var, config)
+            return _convert_string_to_proper_type(result)
         except EnvVarError as e:
             raise EnvVarError(e.var_name, e.path) from None
 
     return config
 
 
+def _convert_string_to_proper_type(value: str) -> Any:
+    # This might be tricky depending on what the config type is, if  'str | None' we are
+    # good, if 'str' we need to keep the empty string... 'str | None' is more common and
+    # providers config should be typed this way.
+    # TODO: we could try to load the config class and see if the config has a field with type 'str | None'
+    # and then convert the empty string to None or not
+    if value == "":
+        return None
+
+    lowered = value.lower()
+    if lowered == "true":
+        return True
+    elif lowered == "false":
+        return False
+
+    try:
+        return int(value)
+    except ValueError:
+        pass
+
+    try:
+        return float(value)
+    except ValueError:
+        pass
+
+    return value
+
+
 def validate_env_pair(env_pair: str) -> tuple[str, str]:
     """Validate and split an environment variable key-value pair."""
     try:
diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py
index 11455ed46..9db87b280 100644
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@@ -25,7 +25,7 @@ class LlamaStackApi:
     def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
         """Run scoring on a single row"""
         if not scoring_params:
-            scoring_params = {fn_id: None for fn_id in scoring_function_ids}
+            scoring_params = dict.fromkeys(scoring_function_ids)
         return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
 
 
diff --git a/llama_stack/log.py b/llama_stack/log.py
index c14967f0a..fcbb79a5d 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -33,7 +33,7 @@ CATEGORIES = [
 ]
 
 # Initialize category levels with default level
-_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
+_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
 
 
 def config_to_category_levels(category: str, level: str):
diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py
index f9f094c3d..7f1ebed55 100644
--- a/llama_stack/models/llama/datatypes.py
+++ b/llama_stack/models/llama/datatypes.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 import base64
-from enum import Enum
+from enum import Enum, StrEnum
 from io import BytesIO
 from typing import Annotated, Any, Literal
 
@@ -171,7 +171,7 @@ class GenerationResult(BaseModel):
     ignore_token: bool
 
 
-class QuantizationMode(str, Enum):
+class QuantizationMode(StrEnum):
     none = "none"
     fp8_mixed = "fp8_mixed"
     int4_mixed = "int4_mixed"
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 60b05545b..221ed9027 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
 
@@ -225,7 +225,7 @@ def remote_provider_spec(
     )
 
 
-class HealthStatus(str, Enum):
+class HealthStatus(StrEnum):
     OK = "OK"
     ERROR = "Error"
     NOT_IMPLEMENTED = "Not Implemented"
diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index 4465a32fe..f291593f4 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -42,9 +42,10 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
+    WebSearchToolTypes,
 )
 from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     Inference,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletion,
@@ -583,7 +584,7 @@ class OpenAIResponsesImpl:
         from llama_stack.apis.agents.openai_responses import (
             MCPListToolsTool,
         )
-        from llama_stack.apis.tools.tools import Tool
+        from llama_stack.apis.tools import Tool
 
         mcp_tool_to_server = {}
 
@@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
             # TODO: Handle other tool types
             if input_tool.type == "function":
                 chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
-            elif input_tool.type == "web_search":
+            elif input_tool.type in WebSearchToolTypes:
                 tool_name = "web_search"
                 tool = await self.tool_groups_api.get_tool(tool_name)
                 if not tool:
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index bc0898dc5..9ae2018c4 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -208,7 +208,7 @@ class MetaReferenceEvalImpl(
                 for scoring_fn_id in scoring_functions
             }
         else:
-            scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions}
+            scoring_functions_dict = dict.fromkeys(scoring_functions)
 
         score_response = await self.scoring_api.score(
             input_rows=score_input_rows, scoring_functions=scoring_functions_dict
diff --git a/llama_stack/providers/inline/files/localfs/config.py b/llama_stack/providers/inline/files/localfs/config.py
index 757a70742..6c767af8f 100644
--- a/llama_stack/providers/inline/files/localfs/config.py
+++ b/llama_stack/providers/inline/files/localfs/config.py
@@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
+            "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
             "metadata_store": SqliteSqlStoreConfig.sample_run_config(
                 __distro_dir__=__distro_dir__,
                 db_name="files_metadata.db",
diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py
index 7bc961443..9556b026a 100644
--- a/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel):
     def sample_run_config(
         cls,
         model: str = "Llama3.2-3B-Instruct",
-        checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
-        quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}",
-        model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}",
-        max_batch_size: str = "${env.MAX_BATCH_SIZE:1}",
-        max_seq_len: str = "${env.MAX_SEQ_LEN:4096}",
+        checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}",
+        quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}",
+        model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}",
+        max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}",
+        max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}",
         **kwargs,
     ) -> dict[str, Any]:
         return {
diff --git a/llama_stack/providers/inline/inference/vllm/config.py b/llama_stack/providers/inline/inference/vllm/config.py
index ce8743c74..660ef206b 100644
--- a/llama_stack/providers/inline/inference/vllm/config.py
+++ b/llama_stack/providers/inline/inference/vllm/config.py
@@ -44,10 +44,10 @@ class VLLMConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
-            "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}",
-            "max_tokens": "${env.MAX_TOKENS:4096}",
-            "max_model_len": "${env.MAX_MODEL_LEN:4096}",
-            "max_num_seqs": "${env.MAX_NUM_SEQS:4}",
-            "enforce_eager": "${env.ENFORCE_EAGER:False}",
-            "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}",
+            "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}",
+            "max_tokens": "${env.MAX_TOKENS:=4096}",
+            "max_model_len": "${env.MAX_MODEL_LEN:=4096}",
+            "max_num_seqs": "${env.MAX_NUM_SEQS:=4}",
+            "enforce_eager": "${env.ENFORCE_EAGER:=False}",
+            "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}",
         }
diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/llama_stack/providers/inline/scoring/braintrust/config.py
index 4a80f1e4f..f44d27f96 100644
--- a/llama_stack/providers/inline/scoring/braintrust/config.py
+++ b/llama_stack/providers/inline/scoring/braintrust/config.py
@@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "openai_api_key": "${env.OPENAI_API_KEY:}",
+            "openai_api_key": "${env.OPENAI_API_KEY:+}",
         }
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
index b705cb9b3..2bd113a94 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@@ -7,7 +7,7 @@ from typing import Any
 
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.scoring import (
     ScoreBatchResponse,
     ScoreResponse,
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
index 51cdf6c3f..340215a53 100644
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@@ -6,7 +6,7 @@
 import re
 from typing import Any
 
-from llama_stack.apis.inference.inference import Inference, UserMessage
+from llama_stack.apis.inference import Inference, UserMessage
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py
index 93509040c..50dd8a788 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from enum import Enum
+from enum import StrEnum
 from typing import Any
 
 from pydantic import BaseModel, Field, field_validator
@@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator
 from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
 
 
-class TelemetrySink(str, Enum):
+class TelemetrySink(StrEnum):
     OTEL_TRACE = "otel_trace"
     OTEL_METRIC = "otel_metric"
     SQLITE = "sqlite"
@@ -20,12 +20,12 @@ class TelemetrySink(str, Enum):
 
 
 class TelemetryConfig(BaseModel):
-    otel_trace_endpoint: str = Field(
-        default="http://localhost:4318/v1/traces",
+    otel_trace_endpoint: str | None = Field(
+        default=None,
         description="The OpenTelemetry collector endpoint URL for traces",
     )
-    otel_metric_endpoint: str = Field(
-        default="http://localhost:4318/v1/metrics",
+    otel_metric_endpoint: str | None = Field(
+        default=None,
         description="The OpenTelemetry collector endpoint URL for metrics",
     )
     service_name: str = Field(
@@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
         return {
-            "service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
-            "sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
-            "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
+            "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
+            "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
         }
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
index 0f6cf8619..98f5bf5a1 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
             trace.set_tracer_provider(provider)
             _TRACER_PROVIDER = provider
             if TelemetrySink.OTEL_TRACE in self.config.sinks:
+                if self.config.otel_trace_endpoint is None:
+                    raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled")
                 span_exporter = OTLPSpanExporter(
                     endpoint=self.config.otel_trace_endpoint,
                 )
                 span_processor = BatchSpanProcessor(span_exporter)
                 trace.get_tracer_provider().add_span_processor(span_processor)
             if TelemetrySink.OTEL_METRIC in self.config.sinks:
+                if self.config.otel_metric_endpoint is None:
+                    raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled")
                 metric_reader = PeriodicExportingMetricReader(
                     OTLPMetricExporter(
                         endpoint=self.config.otel_metric_endpoint,
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index 7f4fe5dbd..6a7c7885c 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -81,6 +81,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
         chunks = []
         for doc in documents:
             content = await content_from_doc(doc)
+            # TODO: we should add enrichment here as URLs won't be added to the metadata by default
             chunks.extend(
                 make_overlapped_chunks(
                     doc.document_id,
@@ -157,8 +158,24 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                 )
                 break
 
-            metadata_subset = {k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]}
-            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset)
+            # Add useful keys from chunk_metadata to metadata and remove some from metadata
+            chunk_metadata_keys_to_include_from_context = [
+                "chunk_id",
+                "document_id",
+                "source",
+            ]
+            metadata_keys_to_exclude_from_context = [
+                "token_count",
+                "metadata_token_count",
+            ]
+            metadata_for_context = {}
+            for k in chunk_metadata_keys_to_include_from_context:
+                metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
+            for k in metadata:
+                if k not in metadata_keys_to_exclude_from_context:
+                    metadata_for_context[k] = metadata[k]
+
+            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
             picked.append(TextContentItem(text=text_content))
 
         picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 12f4d6ad0..355750b25 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -16,8 +16,7 @@ import numpy as np
 from numpy.typing import NDArray
 
 from llama_stack.apis.files import Files
-from llama_stack.apis.inference import InterleavedContent
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference, InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py
index 283724b41..7cc91d918 100644
--- a/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
+            "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
         }
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
index cb806cb39..4c57f4aba 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
@@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db",
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
         }
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index d832e56f5..7e977635a 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -5,20 +5,18 @@
 # the root directory of this source tree.
 
 import asyncio
-import hashlib
 import json
 import logging
 import sqlite3
 import struct
-import uuid
 from typing import Any
 
 import numpy as np
 import sqlite_vec
 from numpy.typing import NDArray
 
-from llama_stack.apis.files.files import Files
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.files import Files
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
@@ -66,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
     score_range = max_score - min_score
     if score_range > 0:
         return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
-    return {doc_id: 1.0 for doc_id in scores}
+    return dict.fromkeys(scores, 1.0)
 
 
 def _weighted_rerank(
@@ -201,10 +199,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                     batch_embeddings = embeddings[i : i + batch_size]
 
                     # Insert metadata
-                    metadata_data = [
-                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json())
-                        for chunk in batch_chunks
-                    ]
+                    metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks]
                     cur.executemany(
                         f"""
                         INSERT INTO {self.metadata_table} (id, chunk)
@@ -218,7 +213,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                     embedding_data = [
                         (
                             (
-                                generate_chunk_id(chunk.metadata["document_id"], chunk.content),
+                                chunk.chunk_id,
                                 serialize_vector(emb.tolist()),
                             )
                         )
@@ -230,10 +225,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                     )
 
                     # Insert FTS content
-                    fts_data = [
-                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.content)
-                        for chunk in batch_chunks
-                    ]
+                    fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
                     # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
                     cur.executemany(
                         f"DELETE FROM {self.fts_table} WHERE id = ?;",
@@ -381,13 +373,12 @@ class SQLiteVecIndex(EmbeddingIndex):
         vector_response = await self.query_vector(embedding, k, score_threshold)
         keyword_response = await self.query_keyword(query_string, k, score_threshold)
 
-        # Convert responses to score dictionaries using generate_chunk_id
+        # Convert responses to score dictionaries using chunk_id
         vector_scores = {
-            generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score
-            for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
+            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
         }
         keyword_scores = {
-            generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score
+            chunk.chunk_id: score
             for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
         }
 
@@ -408,13 +399,7 @@ class SQLiteVecIndex(EmbeddingIndex):
         filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
 
         # Create a map of chunk_id to chunk for both responses
-        chunk_map = {}
-        for c in vector_response.chunks:
-            chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content))
-            chunk_map[chunk_id] = c
-        for c in keyword_response.chunks:
-            chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content))
-            chunk_map[chunk_id] = c
+        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
 
         # Use the map to look up chunks by their IDs
         chunks = []
@@ -757,9 +742,3 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
         if vector_db_id not in self.cache:
             raise ValueError(f"Vector DB {vector_db_id} not found")
         return await self.cache[vector_db_id].query_chunks(query, params)
-
-
-def generate_chunk_id(document_id: str, chunk_text: str) -> str:
-    """Generate a unique chunk ID using a hash of document ID and chunk text."""
-    hash_input = f"{document_id}:{chunk_text}".encode()
-    return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 66f2e8bce..47be57eee 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.inference,
             adapter=AdapterSpec(
                 adapter_type="ollama",
-                pip_packages=["ollama", "aiohttp"],
+                pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
                 config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
                 module="llama_stack.providers.remote.inference.ollama",
             ),
diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py
index e0a04be48..f0fe1e9f5 100644
--- a/llama_stack/providers/registry/safety.py
+++ b/llama_stack/providers/registry/safety.py
@@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]:
             api=Api.safety,
             adapter=AdapterSpec(
                 adapter_type="sambanova",
-                pip_packages=["litellm"],
+                pip_packages=["litellm", "requests"],
                 module="llama_stack.providers.remote.safety.sambanova",
                 config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
                 provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py
index 7980d6a13..244b06842 100644
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
         InlineProviderSpec(
             api=Api.scoring,
             provider_type="inline::basic",
-            pip_packages=[],
+            pip_packages=["requests"],
             module="llama_stack.providers.inline.scoring.basic",
             config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
             api_dependencies=[
diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/llama_stack/providers/remote/datasetio/nvidia/config.py
index e616ce25c..0f5ea22e9 100644
--- a/llama_stack/providers/remote/datasetio/nvidia/config.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/config.py
@@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "api_key": "${env.NVIDIA_API_KEY:}",
-            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
-            "project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
-            "datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
+            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
+            "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
+            "datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
         }
diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
index 1f22a935b..f723c92cc 100644
--- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter:
         Returns:
             Dataset
         """
-        ## add warnings for unsupported params
+        # add warnings for unsupported params
         request_body = {
             "name": dataset_def.identifier,
             "namespace": self.config.dataset_namespace,
diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/llama_stack/providers/remote/eval/nvidia/config.py
index 5c8f9ff76..7a1c04304 100644
--- a/llama_stack/providers/remote/eval/nvidia/config.py
+++ b/llama_stack/providers/remote/eval/nvidia/config.py
@@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
+            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}",
         }
diff --git a/llama_stack/providers/remote/inference/anthropic/models.py b/llama_stack/providers/remote/inference/anthropic/models.py
index 39cb64440..afaf3c4e4 100644
--- a/llama_stack/providers/remote/inference/anthropic/models.py
+++ b/llama_stack/providers/remote/inference/anthropic/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py
index 79b1b5f08..1c82ff3a8 100644
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@@ -24,6 +24,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -33,14 +39,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import (
diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py
index 027eeab8d..392aed72f 100644
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/gemini/models.py b/llama_stack/providers/remote/inference/gemini/models.py
index ef1cf339f..c4bb4f08b 100644
--- a/llama_stack/providers/remote/inference/gemini/models.py
+++ b/llama_stack/providers/remote/inference/gemini/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index 27d7d7961..4b295e788 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -9,7 +9,7 @@ from typing import Any
 
 from openai import AsyncOpenAI
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAIChoiceDelta,
diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py
index 4c449edc2..6369928bb 100644
--- a/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/llama_stack/providers/remote/inference/nvidia/config.py
@@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
-            "api_key": "${env.NVIDIA_API_KEY:}",
-            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}",
+            "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
+            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
         }
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index cb6c6e279..1dd72da3f 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -29,20 +29,18 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
     ToolChoice,
     ToolConfig,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference import (
diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/llama_stack/providers/remote/inference/ollama/__init__.py
index 073c31cde..491339451 100644
--- a/llama_stack/providers/remote/inference/ollama/__init__.py
+++ b/llama_stack/providers/remote/inference/ollama/__init__.py
@@ -10,6 +10,6 @@ from .config import OllamaImplConfig
 async def get_adapter_impl(config: OllamaImplConfig, _deps):
     from .ollama import OllamaInferenceAdapter
 
-    impl = OllamaInferenceAdapter(config.url)
+    impl = OllamaInferenceAdapter(config)
     await impl.initialize()
     return impl
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py
index 0e4aef0e1..b2cc4d8a7 100644
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@@ -13,7 +13,13 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
 
 class OllamaImplConfig(BaseModel):
     url: str = DEFAULT_OLLAMA_URL
+    raise_on_connect_error: bool = True
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]:
-        return {"url": url}
+    def sample_run_config(
+        cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
+    ) -> dict[str, Any]:
+        return {
+            "url": url,
+            "raise_on_connect_error": raise_on_connect_error,
+        }
diff --git a/llama_stack/providers/remote/inference/ollama/models.py b/llama_stack/providers/remote/inference/ollama/models.py
index 8f0f0421a..cacf88861 100644
--- a/llama_stack/providers/remote/inference/ollama/models.py
+++ b/llama_stack/providers/remote/inference/ollama/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index d51072fbf..e9df0dcc8 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -9,7 +9,6 @@ import uuid
 from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
 
-import httpx
 from ollama import AsyncClient  # type: ignore[attr-defined]
 from openai import AsyncOpenAI
 
@@ -33,6 +32,13 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -41,15 +47,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
@@ -57,6 +54,7 @@ from llama_stack.providers.datatypes import (
     HealthStatus,
     ModelsProtocolPrivate,
 )
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.utils.inference.model_registry import (
     ModelRegistryHelper,
 )
@@ -90,9 +88,10 @@ class OllamaInferenceAdapter(
     InferenceProvider,
     ModelsProtocolPrivate,
 ):
-    def __init__(self, url: str) -> None:
+    def __init__(self, config: OllamaImplConfig) -> None:
         self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
-        self.url = url
+        self.url = config.url
+        self.raise_on_connect_error = config.raise_on_connect_error
 
     @property
     def client(self) -> AsyncClient:
@@ -103,8 +102,13 @@ class OllamaInferenceAdapter(
         return AsyncOpenAI(base_url=f"{self.url}/v1", api_key="ollama")
 
     async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.url}`...")
-        await self.health()
+        logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
+        health_response = await self.health()
+        if health_response["status"] == HealthStatus.ERROR:
+            if self.raise_on_connect_error:
+                raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
+            else:
+                logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
 
     async def health(self) -> HealthResponse:
         """
@@ -117,10 +121,8 @@ class OllamaInferenceAdapter(
         try:
             await self.client.ps()
             return HealthResponse(status=HealthStatus.OK)
-        except httpx.ConnectError as e:
-            raise RuntimeError(
-                "Ollama Server is not running, start it using `ollama serve` in a separate terminal"
-            ) from e
+        except Exception as e:
+            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
 
     async def shutdown(self) -> None:
         pass
diff --git a/llama_stack/providers/remote/inference/openai/models.py b/llama_stack/providers/remote/inference/openai/models.py
index e029c456c..14a6955d5 100644
--- a/llama_stack/providers/remote/inference/openai/models.py
+++ b/llama_stack/providers/remote/inference/openai/models.py
@@ -6,7 +6,7 @@
 
 from dataclasses import dataclass
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
 )
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py
index ed4ec22aa..72428422f 100644
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@@ -10,7 +10,7 @@ from typing import Any
 
 from openai import AsyncOpenAI
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
     OpenAIChatCompletionChunk,
     OpenAICompletion,
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py
index e9660abb9..d5b3a5973 100644
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -19,7 +19,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -28,13 +33,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model
 from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/llama_stack/providers/remote/inference/runpod/config.py
index e3913dc35..ff32a971c 100644
--- a/llama_stack/providers/remote/inference/runpod/config.py
+++ b/llama_stack/providers/remote/inference/runpod/config.py
@@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
         return {
-            "url": "${env.RUNPOD_URL:}",
-            "api_token": "${env.RUNPOD_API_TOKEN:}",
+            "url": "${env.RUNPOD_URL:+}",
+            "api_token": "${env.RUNPOD_API_TOKEN:+}",
         }
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py
index f8c98893e..1863b8a50 100644
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator
 from openai import OpenAI
 
 from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse
+from llama_stack.apis.inference import OpenAIEmbeddingsResponse
 
 # from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/together/config.py b/llama_stack/providers/remote/inference/together/config.py
index 5c7f60519..121e2cae7 100644
--- a/llama_stack/providers/remote/inference/together/config.py
+++ b/llama_stack/providers/remote/inference/together/config.py
@@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
             "url": "https://api.together.xyz/v1",
-            "api_key": "${env.TOGETHER_API_KEY:}",
+            "api_key": "${env.TOGETHER_API_KEY:+}",
         }
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
index f4b259767..85e1b1848 100644
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
     ProviderModelEntry,
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 7030a644d..9e6877b7c 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -23,7 +23,12 @@ from llama_stack.apis.inference import (
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     ResponseFormatType,
     SamplingParams,
@@ -33,13 +38,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py
index 99abddf51..e11efa7f0 100644
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel):
     @classmethod
     def validate_tls_verify(cls, v):
         if isinstance(v, str):
-            # Check if it's a boolean string
-            if v.lower() in ("true", "false"):
-                return v.lower() == "true"
             # Otherwise, treat it as a cert path
             cert_path = Path(v).expanduser().resolve()
             if not cert_path.exists():
@@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
     ):
         return {
             "url": url,
-            "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
-            "api_token": "${env.VLLM_API_TOKEN:fake}",
-            "tls_verify": "${env.VLLM_TLS_VERIFY:true}",
+            "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
+            "api_token": "${env.VLLM_API_TOKEN:=fake}",
+            "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
         }
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 3424be6b4..d1455acaa 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
 
 import httpx
-from openai import AsyncOpenAI
+from openai import APIConnectionError, AsyncOpenAI
 from openai.types.chat.chat_completion_chunk import (
     ChatCompletionChunk as OpenAIChatCompletionChunk,
 )
@@ -38,9 +38,13 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAICompletion,
     OpenAIEmbeddingData,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -49,12 +53,6 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
 from llama_stack.models.llama.sku_list import all_registered_models
@@ -461,7 +459,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             model = await self.register_helper.register_model(model)
         except ValueError:
             pass  # Ignore statically unknown model, will check live listing
-        res = await client.models.list()
+        try:
+            res = await client.models.list()
+        except APIConnectionError as e:
+            raise ValueError(
+                f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
+            ) from e
         available_models = [m.id async for m in res]
         if model.provider_resource_id not in available_models:
             raise ValueError(
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/llama_stack/providers/remote/inference/watsonx/config.py
index 5eda9c5c0..9534eceeb 100644
--- a/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
-            "api_key": "${env.WATSONX_API_KEY:}",
-            "project_id": "${env.WATSONX_PROJECT_ID:}",
+            "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
+            "api_key": "${env.WATSONX_API_KEY:+}",
+            "project_id": "${env.WATSONX_PROJECT_ID:+}",
         }
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/llama_stack/providers/remote/inference/watsonx/watsonx.py
index 7cdd06a1f..78161d1cb 100644
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@@ -18,10 +18,16 @@ from llama_stack.apis.inference import (
     CompletionRequest,
     EmbeddingsResponse,
     EmbeddingTaskType,
+    GreedySamplingStrategy,
     Inference,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
     OpenAIEmbeddingsResponse,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -29,14 +35,6 @@ from llama_stack.apis.inference import (
     ToolConfig,
     ToolDefinition,
     ToolPromptFormat,
-)
-from llama_stack.apis.inference.inference import (
-    GreedySamplingStrategy,
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
     TopKSamplingStrategy,
     TopPSamplingStrategy,
 )
diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/llama_stack/providers/remote/post_training/nvidia/config.py
index fa08b6e3f..ea6dff0b5 100644
--- a/llama_stack/providers/remote/post_training/nvidia/config.py
+++ b/llama_stack/providers/remote/post_training/nvidia/config.py
@@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "api_key": "${env.NVIDIA_API_KEY:}",
-            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
-            "project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
-            "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
+            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
+            "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
+            "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}",
         }
 
 
diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py
index ddf5a3a0b..1c618f4f4 100644
--- a/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/llama_stack/providers/remote/safety/nvidia/config.py
@@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, **kwargs) -> dict[str, Any]:
         return {
-            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
-            "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}",
+            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}",
+            "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}",
         }
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/llama_stack/providers/remote/tool_runtime/brave_search/config.py
index 37ba21304..93b97a1b2 100644
--- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py
@@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "api_key": "${env.BRAVE_SEARCH_API_KEY:}",
+            "api_key": "${env.BRAVE_SEARCH_API_KEY:+}",
             "max_results": 3,
         }
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
index c9b18d30d..5bdd27807 100644
--- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
@@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
         return {
-            "api_key": "${env.TAVILY_SEARCH_API_KEY:}",
+            "api_key": "${env.TAVILY_SEARCH_API_KEY:+}",
             "max_results": 3,
         }
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
index aefc86bd6..b5b10e371 100644
--- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
+++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
@@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel):
     @classmethod
     def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
         return {
-            "api_key": "${env.WOLFRAM_ALPHA_API_KEY:}",
+            "api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}",
         }
diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/llama_stack/providers/remote/vector_io/pgvector/config.py
index 04b92a2e4..041e864ca 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/config.py
@@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel):
     @classmethod
     def sample_run_config(
         cls,
-        host: str = "${env.PGVECTOR_HOST:localhost}",
-        port: int = "${env.PGVECTOR_PORT:5432}",
+        host: str = "${env.PGVECTOR_HOST:=localhost}",
+        port: int = "${env.PGVECTOR_PORT:=5432}",
         db: str = "${env.PGVECTOR_DB}",
         user: str = "${env.PGVECTOR_USER}",
         password: str = "${env.PGVECTOR_PASSWORD}",
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index e9d6eec22..09ea08fa0 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -70,8 +70,8 @@ class QdrantIndex(EmbeddingIndex):
             )
 
         points = []
-        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
-            chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
+        for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
+            chunk_id = chunk.chunk_id
             points.append(
                 PointStruct(
                     id=convert_id(chunk_id),
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
index c21f379c9..d19908368 100644
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@@ -23,6 +23,13 @@ from llama_stack.apis.inference import (
     JsonSchemaResponseFormat,
     LogProbConfig,
     Message,
+    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
+    OpenAICompletion,
+    OpenAIEmbeddingsResponse,
+    OpenAIEmbeddingUsage,
+    OpenAIMessageParam,
+    OpenAIResponseFormatParam,
     ResponseFormat,
     SamplingParams,
     TextTruncation,
@@ -31,16 +38,7 @@ from llama_stack.apis.inference import (
     ToolDefinition,
     ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
-    OpenAIChatCompletion,
-    OpenAIChatCompletionChunk,
-    OpenAICompletion,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
-)
-from llama_stack.apis.models.models import Model
+from llama_stack.apis.models import Model
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index d707e36c2..de67e5288 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -8,7 +8,7 @@ from typing import Any
 
 from pydantic import BaseModel, Field
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference import (
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index ff95b12a7..47144ee0e 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -95,27 +95,25 @@ from llama_stack.apis.inference import (
     CompletionResponse,
     CompletionResponseStreamChunk,
     GreedySamplingStrategy,
-    Message,
-    SamplingParams,
-    SystemMessage,
-    TokenLogProbs,
-    ToolChoice,
-    ToolResponseMessage,
-    TopKSamplingStrategy,
-    TopPSamplingStrategy,
-    UserMessage,
-)
-from llama_stack.apis.inference.inference import (
     JsonSchemaResponseFormat,
+    Message,
     OpenAIChatCompletion,
     OpenAICompletion,
     OpenAICompletionChoice,
     OpenAIEmbeddingData,
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
+    SamplingParams,
+    SystemMessage,
+    TokenLogProbs,
+    ToolChoice,
     ToolConfig,
+    ToolResponseMessage,
+    TopKSamplingStrategy,
+    TopPSamplingStrategy,
+    UserMessage,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChoice as OpenAIChatCompletionChoice,
 )
 from llama_stack.models.llama.datatypes import (
@@ -1026,7 +1024,9 @@ def openai_messages_to_messages(
     return converted_messages
 
 
-def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam]):
+def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
+    if content is None:
+        return ""
     if isinstance(content, str):
         return content
     elif isinstance(content, list):
diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py
index e966e13ba..a45ff4ce8 100644
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig):
         return {
             "type": "redis",
             "namespace": None,
-            "host": "${env.REDIS_HOST:localhost}",
-            "port": "${env.REDIS_PORT:6379}",
+            "host": "${env.REDIS_HOST:=localhost}",
+            "port": "${env.REDIS_PORT:=6379}",
         }
 
 
@@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig):
         return {
             "type": "sqlite",
             "namespace": None,
-            "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
         }
 
 
@@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig):
         return {
             "type": "postgres",
             "namespace": None,
-            "host": "${env.POSTGRES_HOST:localhost}",
-            "port": "${env.POSTGRES_PORT:5432}",
-            "db": "${env.POSTGRES_DB:llamastack}",
-            "user": "${env.POSTGRES_USER:llamastack}",
-            "password": "${env.POSTGRES_PASSWORD:llamastack}",
-            "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
+            "host": "${env.POSTGRES_HOST:=localhost}",
+            "port": "${env.POSTGRES_PORT:=5432}",
+            "db": "${env.POSTGRES_DB:=llamastack}",
+            "user": "${env.POSTGRES_USER:=llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
+            "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
         }
 
     @classmethod
@@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig):
         return {
             "type": "mongodb",
             "namespace": None,
-            "host": "${env.MONGODB_HOST:localhost}",
-            "port": "${env.MONGODB_PORT:5432}",
+            "host": "${env.MONGODB_HOST:=localhost}",
+            "port": "${env.MONGODB_PORT:=5432}",
             "db": "${env.MONGODB_DB}",
             "user": "${env.MONGODB_USER}",
             "password": "${env.MONGODB_PASSWORD}",
-            "collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}",
+            "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
         }
 
 
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 8b962db76..d00624aed 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -12,8 +12,7 @@ import uuid
 from abc import ABC, abstractmethod
 from typing import Any
 
-from llama_stack.apis.files import Files
-from llama_stack.apis.files.files import OpenAIFileObject
+from llama_stack.apis.files import Files, OpenAIFileObject
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
     Chunk,
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index a6e420feb..ab204a75a 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -7,6 +7,7 @@ import base64
 import io
 import logging
 import re
+import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any
@@ -23,12 +24,13 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
+from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
+from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
 
 log = logging.getLogger(__name__)
 
@@ -148,6 +150,7 @@ async def content_from_doc(doc: RAGDocument) -> str:
 def make_overlapped_chunks(
     document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any]
 ) -> list[Chunk]:
+    default_tokenizer = "DEFAULT_TIKTOKEN_TOKENIZER"
     tokenizer = Tokenizer.get_instance()
     tokens = tokenizer.encode(text, bos=False, eos=False)
     try:
@@ -161,16 +164,32 @@ def make_overlapped_chunks(
     for i in range(0, len(tokens), window_len - overlap_len):
         toks = tokens[i : i + window_len]
         chunk = tokenizer.decode(toks)
+        chunk_id = generate_chunk_id(chunk, text)
         chunk_metadata = metadata.copy()
+        chunk_metadata["chunk_id"] = chunk_id
         chunk_metadata["document_id"] = document_id
         chunk_metadata["token_count"] = len(toks)
         chunk_metadata["metadata_token_count"] = len(metadata_tokens)
 
+        backend_chunk_metadata = ChunkMetadata(
+            chunk_id=chunk_id,
+            document_id=document_id,
+            source=metadata.get("source", None),
+            created_timestamp=metadata.get("created_timestamp", int(time.time())),
+            updated_timestamp=int(time.time()),
+            chunk_window=f"{i}-{i + len(toks)}",
+            chunk_tokenizer=default_tokenizer,
+            chunk_embedding_model=None,  # This will be set in `VectorDBWithIndex.insert_chunks`
+            content_token_count=len(toks),
+            metadata_token_count=len(metadata_tokens),
+        )
+
         # chunk is a string
         chunks.append(
             Chunk(
                 content=chunk,
                 metadata=chunk_metadata,
+                chunk_metadata=backend_chunk_metadata,
             )
         )
 
@@ -237,6 +256,9 @@ class VectorDBWithIndex:
         for i, c in enumerate(chunks):
             if c.embedding is None:
                 chunks_to_embed.append(c)
+                if c.chunk_metadata:
+                    c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model
+                    c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension
             else:
                 _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
 
diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py
index edc7672a3..d558a2a26 100644
--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
     def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
         return cls(
             type="sqlite",
-            db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
         )
 
     @property
@@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
     def sample_run_config(cls, **kwargs):
         return cls(
             type="postgres",
-            host="${env.POSTGRES_HOST:localhost}",
-            port="${env.POSTGRES_PORT:5432}",
-            db="${env.POSTGRES_DB:llamastack}",
-            user="${env.POSTGRES_USER:llamastack}",
-            password="${env.POSTGRES_PASSWORD:llamastack}",
+            host="${env.POSTGRES_HOST:=localhost}",
+            port="${env.POSTGRES_PORT:=5432}",
+            db="${env.POSTGRES_DB:=llamastack}",
+            user="${env.POSTGRES_USER:=llamastack}",
+            password="${env.POSTGRES_PASSWORD:=llamastack}",
         )
 
 
diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py
index 10081f037..4ae68ee1d 100644
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@@ -180,7 +180,7 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
 
     trace_id = generate_trace_id()
     context = TraceContext(BACKGROUND_LOGGER, trace_id)
-    attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {})
+    attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | (attributes or {})
     context.push_span(name, attributes)
 
     CURRENT_TRACE_CONTEXT.set(context)
diff --git a/llama_stack/providers/utils/vector_io/__init__.py b/llama_stack/providers/utils/vector_io/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/providers/utils/vector_io/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/providers/utils/vector_io/chunk_utils.py b/llama_stack/providers/utils/vector_io/chunk_utils.py
new file mode 100644
index 000000000..68cf11cad
--- /dev/null
+++ b/llama_stack/providers/utils/vector_io/chunk_utils.py
@@ -0,0 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import hashlib
+import uuid
+
+
+def generate_chunk_id(document_id: str, chunk_text: str) -> str:
+    """Generate a unique chunk ID using a hash of document ID and chunk text."""
+    hash_input = f"{document_id}:{chunk_text}".encode()
+    return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
index 97a06f77a..1a2c883fa 100644
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use AWS Bedrock for running LLM inference and safety
   providers:
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index 8033b2086..61bc83f02 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: bedrock
 apis:
 - agents
@@ -22,7 +22,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
   safety:
   - provider_id: bedrock
     provider_type: remote::bedrock
@@ -34,17 +34,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -52,7 +52,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -60,14 +60,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -78,17 +78,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -98,10 +98,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db
 models:
 - metadata: {}
   model_id: meta.llama3-1-8b-instruct-v1:0
diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml
index f26f4ed9b..ecd0ac418 100644
--- a/llama_stack/templates/cerebras/build.yaml
+++ b/llama_stack/templates/cerebras/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Cerebras for running LLM inference
   providers:
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index d891502d8..f341a88c1 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index 490648302..9bd8fcc7c 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: cerebras
 apis:
 - agents
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -40,10 +40,10 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -51,7 +51,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -59,14 +59,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -77,34 +77,34 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/inference_store.db
 models:
 - metadata: {}
   model_id: llama3.1-8b
diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml
index 9f4fbbdda..c061d0793 100644
--- a/llama_stack/templates/ci-tests/build.yaml
+++ b/llama_stack/templates/ci-tests/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Distribution for running e2e tests in CI
   providers:
diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py
index afa8a23ce..7de8069ae 100644
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml
index 92497b0bf..4b7de1c0c 100644
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: ci-tests
 apis:
 - agents
@@ -24,7 +24,7 @@ providers:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/sqlite_vec.db
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -37,17 +37,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -55,7 +55,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -63,14 +63,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -81,17 +81,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -101,10 +101,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml
index 513df16c1..ff8d58a08 100644
--- a/llama_stack/templates/dell/build.yaml
+++ b/llama_stack/templates/dell/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
     container
diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py
index a7ec5f3b8..5a6f52a89 100644
--- a/llama_stack/templates/dell/dell.py
+++ b/llama_stack/templates/dell/dell.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
index 22cf1fd24..7f1d0a8c0 100644
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: dell
 apis:
 - agents
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,27 +84,27 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
index aeca2fc26..310f3cc20 100644
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: dell
 apis:
 - agents
@@ -36,17 +36,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -54,7 +54,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -62,14 +62,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -80,27 +80,27 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
index 53b47da41..eb08c1d43 100644
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Fireworks.AI for running LLM inference
   providers:
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index 5e8935361..ad29c648f 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index 302328486..6265f5cae 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: fireworks
 apis:
 - agents
@@ -28,7 +28,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -46,17 +46,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -64,7 +64,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -72,14 +72,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -90,30 +90,30 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -122,10 +122,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index a31ed732b..e10404e92 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: fireworks
 apis:
 - agents
@@ -28,7 +28,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -41,17 +41,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -59,7 +59,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -67,14 +67,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -85,30 +85,30 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/fireworks/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -117,10 +117,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db
 models:
 - metadata: {}
   model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml
index 819df22f0..7e50a899f 100644
--- a/llama_stack/templates/groq/build.yaml
+++ b/llama_stack/templates/groq/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Groq for running LLM inference
   providers:
diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py
index 4e52aa42d..9e166a288 100644
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index 7f1912a6f..21c8f7e0f 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: groq
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,27 +84,27 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/inference_store.db
 models:
 - metadata: {}
   model_id: groq/llama3-8b-8192
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
index 8ede83694..9fca9ac22 100644
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
   providers:
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
index 69e037299..23887469f 100644
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index 8b00f4ba5..2ae1d7685 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-endpoint
 apis:
 - agents
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -109,10 +109,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index 8a9cd5c49..3ec5ae9c1 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-endpoint
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -104,10 +104,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
index d0752db9a..214245116 100644
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
   providers:
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
index ecfe2a167..c58c0921d 100644
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index fec64c1df..3871b77e7 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-serverless
 apis:
 - agents
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -109,10 +109,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index d4a6286d7..0a5b59400 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: hf-serverless
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -104,10 +104,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/llama_api/build.yaml b/llama_stack/templates/llama_api/build.yaml
index 857e5f014..44a42594a 100644
--- a/llama_stack/templates/llama_api/build.yaml
+++ b/llama_stack/templates/llama_api/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Distribution for running e2e tests in CI
   providers:
diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py
index b4641b9da..7631781af 100644
--- a/llama_stack/templates/llama_api/llama_api.py
+++ b/llama_stack/templates/llama_api/llama_api.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
@@ -41,7 +41,7 @@ def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
         (
             "llama-openai-compat",
             LLLAMA_MODEL_ENTRIES,
-            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"),
+            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:+}"),
         ),
     ]
     inference_providers = []
@@ -85,17 +85,17 @@ def get_distribution_template() -> DistributionTemplate:
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml
index 2185eb4fc..b627ed2f1 100644
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: llama_api
 apis:
 - agents
@@ -16,7 +16,7 @@ providers:
     provider_type: remote::llama-openai-compat
     config:
       openai_compat_api_base: https://api.llama.com/compat/v1/
-      api_key: ${env.LLAMA_API_KEY:}
+      api_key: ${env.LLAMA_API_KEY:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -24,19 +24,19 @@ providers:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -49,17 +49,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -67,7 +67,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -75,14 +75,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -93,17 +93,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -113,10 +113,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/inference_store.db
 models:
 - metadata: {}
   model_id: Llama-3.3-70B-Instruct
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
index 53ad411e3..2119eeddd 100644
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Meta Reference for running LLM inference
   providers:
diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py
index 95d126095..57fb8f2af 100644
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index e65445a9e..6b15a1e01 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: meta-reference-gpu
 apis:
 - agents
@@ -18,10 +18,10 @@ providers:
       model: ${env.INFERENCE_MODEL}
       checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
       quantization:
-        type: ${env.QUANTIZATION_TYPE:bf16}
-      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0}
-      max_batch_size: ${env.MAX_BATCH_SIZE:1}
-      max_seq_len: ${env.MAX_SEQ_LEN:4096}
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -31,10 +31,10 @@ providers:
       model: ${env.SAFETY_MODEL}
       checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
       quantization:
-        type: ${env.QUANTIZATION_TYPE:bf16}
-      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0}
-      max_batch_size: ${env.MAX_BATCH_SIZE:1}
-      max_seq_len: ${env.MAX_SEQ_LEN:4096}
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -42,7 +42,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -55,17 +55,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -73,7 +73,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -81,14 +81,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -99,17 +99,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -119,10 +119,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index 8ef02f14d..1b44a0b3e 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: meta-reference-gpu
 apis:
 - agents
@@ -18,10 +18,10 @@ providers:
       model: ${env.INFERENCE_MODEL}
       checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
       quantization:
-        type: ${env.QUANTIZATION_TYPE:bf16}
-      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:0}
-      max_batch_size: ${env.MAX_BATCH_SIZE:1}
-      max_seq_len: ${env.MAX_SEQ_LEN:4096}
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -32,7 +32,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -109,10 +109,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml
index 6bd8a0100..51685b2e3 100644
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use NVIDIA NIM for running LLM inference, evaluation and safety
   providers:
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
index eebfa1066..875fccc9d 100644
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: nvidia
 apis:
 - agents
@@ -16,14 +16,14 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
-      api_key: ${env.NVIDIA_API_KEY:}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -31,13 +31,13 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
   safety:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -45,30 +45,30 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
   eval:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
   datasetio:
   - provider_id: localfs
     provider_type: inline::localfs
@@ -76,14 +76,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -94,10 +94,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index be0e3f6d1..4477d5244 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: nvidia
 apis:
 - agents
@@ -16,9 +16,9 @@ providers:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
-      api_key: ${env.NVIDIA_API_KEY:}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -26,13 +26,13 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
   safety:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -40,38 +40,38 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
   eval:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
   post_training:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
   datasetio:
   - provider_id: nvidia
     provider_type: remote::nvidia
     config:
-      api_key: ${env.NVIDIA_API_KEY:}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
-      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
-      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -82,10 +82,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
 models:
 - metadata: {}
   model_id: meta/llama3-8b-instruct
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index ebe0849f3..cbf4281a2 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) Ollama server for running LLM inference
   providers:
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 46c4852a4..cba25296b 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 85d5c813b..5e906a12c 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: ollama
 apis:
 - agents
@@ -17,7 +17,8 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      raise_on_connect_error: true
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -25,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +41,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +59,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +67,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,15 +85,15 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
@@ -104,12 +105,12 @@ providers:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -120,13 +121,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 2d10a99a4..d2b4e3978 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: ollama
 apis:
 - agents
@@ -17,7 +17,8 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      raise_on_connect_error: true
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
@@ -25,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -38,17 +39,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -56,7 +57,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -64,14 +65,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -82,15 +83,15 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/ollama/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db
   post_training:
   - provider_id: huggingface
     provider_type: inline::huggingface
@@ -102,12 +103,12 @@ providers:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -118,13 +119,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/open-benchmark/build.yaml b/llama_stack/templates/open-benchmark/build.yaml
index 840f1e1db..5f82c5243 100644
--- a/llama_stack/templates/open-benchmark/build.yaml
+++ b/llama_stack/templates/open-benchmark/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Distribution for running open benchmarks
   providers:
diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py
index d944d4eff..b4cfbdb52 100644
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@@ -6,7 +6,7 @@
 
 
 from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     BenchmarkInput,
     DatasetInput,
@@ -120,17 +120,17 @@ def get_distribution_template() -> DistributionTemplate:
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml
index 051ca6f8e..403b0fd3d 100644
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: open-benchmark
 apis:
 - agents
@@ -33,24 +33,24 @@ providers:
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   vector_io:
   - provider_id: sqlite-vec
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -63,17 +63,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -81,7 +81,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -89,14 +89,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -107,17 +107,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -127,10 +127,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml
index 46b99cb75..e2e041dbc 100644
--- a/llama_stack/templates/passthrough/build.yaml
+++ b/llama_stack/templates/passthrough/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Passthrough hosted llama-stack endpoint for LLM inference
   providers:
diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py
index 6a30625c5..1b94a9aae 100644
--- a/llama_stack/templates/passthrough/passthrough.py
+++ b/llama_stack/templates/passthrough/passthrough.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml
index 3168eeb9f..c5b047511 100644
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: passthrough
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,22 +89,22 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -113,10 +113,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml
index 48abf8577..896b3c91e 100644
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: passthrough
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,22 +84,22 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
@@ -108,10 +108,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
diff --git a/llama_stack/templates/postgres-demo/build.yaml b/llama_stack/templates/postgres-demo/build.yaml
index 6416cd00f..645b59613 100644
--- a/llama_stack/templates/postgres-demo/build.yaml
+++ b/llama_stack/templates/postgres-demo/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Quick start template for running Llama Stack with several popular providers
   providers:
diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py
index 759281567..5d42b8901 100644
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
@@ -50,9 +50,9 @@ def get_distribution_template() -> DistributionTemplate:
 
     vector_io_providers = [
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
     ]
     default_tool_groups = [
@@ -114,9 +114,9 @@ def get_distribution_template() -> DistributionTemplate:
                             provider_id="meta-reference",
                             provider_type="inline::meta-reference",
                             config=dict(
-                                service_name="${env.OTEL_SERVICE_NAME:}",
-                                sinks="${env.TELEMETRY_SINKS:console,otel_trace}",
-                                otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}",
+                                service_name="${env.OTEL_SERVICE_NAME:+}",
+                                sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
+                                otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
                             ),
                         )
                     ],
diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml
index 0e0d020b2..03b7a59fb 100644
--- a/llama_stack/templates/postgres-demo/run.yaml
+++ b/llama_stack/templates/postgres-demo/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: postgres-demo
 apis:
 - agents
@@ -13,17 +13,17 @@ providers:
     provider_type: remote::vllm
     config:
       url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
+      url: ${env.CHROMADB_URL:+}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -35,35 +35,35 @@ providers:
     config:
       persistence_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
       responses_store:
         type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
-        port: ${env.POSTGRES_PORT:5432}
-        db: ${env.POSTGRES_DB:llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        host: ${env.POSTGRES_HOST:=localhost}
+        port: ${env.POSTGRES_PORT:=5432}
+        db: ${env.POSTGRES_DB:=llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
-      sinks: ${env.TELEMETRY_SINKS:console,otel_trace}
-      otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:http://localhost:4318/v1/traces}
+      service_name: ${env.OTEL_SERVICE_NAME:+}
+      sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
+      otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -73,19 +73,19 @@ providers:
     config: {}
 metadata_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
-  table_name: ${env.POSTGRES_TABLE_NAME:llamastack_kvstore}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
+  table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
 inference_store:
   type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
-  port: ${env.POSTGRES_PORT:5432}
-  db: ${env.POSTGRES_DB:llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  host: ${env.POSTGRES_HOST:=localhost}
+  port: ${env.POSTGRES_PORT:=5432}
+  db: ${env.POSTGRES_DB:=llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml
index 16fe5d4fd..0298b01c7 100644
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) vLLM server for running LLM inference
   providers:
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 64f71087a..b297f1489 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: remote-vllm
 apis:
 - agents
@@ -16,16 +16,16 @@ providers:
     provider_type: remote::vllm
     config:
       url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: vllm-safety
     provider_type: remote::vllm
     config:
       url: ${env.SAFETY_VLLM_URL}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -36,7 +36,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -49,10 +49,10 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -60,7 +60,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -68,14 +68,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -86,24 +86,24 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -114,13 +114,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index 353b9902d..6bd332cc9 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: remote-vllm
 apis:
 - agents
@@ -16,9 +16,9 @@ providers:
     provider_type: remote::vllm
     config:
       url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -29,7 +29,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -42,10 +42,10 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -53,7 +53,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -61,14 +61,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -79,24 +79,24 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -107,13 +107,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index 2782a3ea0..94606e9d0 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml
index 14b1c8974..ba70f88c6 100644
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use SambaNova for running LLM inference and safety
   providers:
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index 58d0d36e3..b96621b58 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: sambanova
 apis:
 - agents
@@ -24,19 +24,19 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   safety:
   - provider_id: sambanova
     provider_type: remote::sambanova
@@ -50,27 +50,27 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/trace_store.db
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -81,13 +81,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/inference_store.db
 models:
 - metadata: {}
   model_id: sambanova/Meta-Llama-3.1-8B-Instruct
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index 54a49423d..428577697 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
@@ -73,17 +73,17 @@ def get_distribution_template() -> DistributionTemplate:
             ),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml
index 9bf4913a7..3b48dcf7a 100644
--- a/llama_stack/templates/starter/build.yaml
+++ b/llama_stack/templates/starter/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Quick start template for running Llama Stack with several popular providers
   providers:
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index 960e96d01..f7c53170b 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: starter
 apis:
 - agents
@@ -16,74 +16,82 @@ providers:
   - provider_id: openai
     provider_type: remote::openai
     config:
-      api_key: ${env.OPENAI_API_KEY:}
+      api_key: ${env.OPENAI_API_KEY:+}
   - provider_id: fireworks
     provider_type: remote::fireworks
     config:
       url: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY:}
+      api_key: ${env.FIREWORKS_API_KEY:+}
   - provider_id: together
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+      raise_on_connect_error: false
   - provider_id: anthropic
     provider_type: remote::anthropic
     config:
-      api_key: ${env.ANTHROPIC_API_KEY:}
+      api_key: ${env.ANTHROPIC_API_KEY:+}
   - provider_id: gemini
     provider_type: remote::gemini
     config:
-      api_key: ${env.GEMINI_API_KEY:}
+      api_key: ${env.GEMINI_API_KEY:+}
   - provider_id: groq
     provider_type: remote::groq
     config:
       url: https://api.groq.com
-      api_key: ${env.GROQ_API_KEY:}
+      api_key: ${env.GROQ_API_KEY:+}
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
       url: https://api.sambanova.ai/v1
-      api_key: ${env.SAMBANOVA_API_KEY:}
+      api_key: ${env.SAMBANOVA_API_KEY:+}
   - provider_id: vllm
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
-  - provider_id: sqlite-vec
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
+  - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec}
     provider_type: inline::sqlite-vec
     config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
     provider_type: remote::chromadb
     config:
-      url: ${env.CHROMADB_URL:}
-  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+      url: ${env.CHROMADB_URL:+}
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
     provider_type: remote::pgvector
     config:
-      host: ${env.PGVECTOR_HOST:localhost}
-      port: ${env.PGVECTOR_PORT:5432}
-      db: ${env.PGVECTOR_DB:}
-      user: ${env.PGVECTOR_USER:}
-      password: ${env.PGVECTOR_PASSWORD:}
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:+}
+      user: ${env.PGVECTOR_USER:+}
+      password: ${env.PGVECTOR_PASSWORD:+}
   files:
   - provider_id: meta-reference-files
     provider_type: inline::localfs
     config:
-      storage_dir: ${env.FILES_STORAGE_DIR:~/.llama/distributions/starter/files}
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
       metadata_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/files_metadata.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -96,17 +104,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -114,7 +122,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -122,14 +130,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -140,17 +148,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -160,10 +168,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db
 models:
 - metadata: {}
   model_id: openai/gpt-4o
@@ -530,160 +538,15 @@ models:
   provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
   model_type: llm
 - metadata: {}
-  model_id: ollama/llama3.1:8b-instruct-fp16
+  model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:=__disabled__}
   provider_id: ollama
-  provider_model_id: llama3.1:8b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.1-8B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.1:8b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:8b
-  provider_id: ollama
-  provider_model_id: llama3.1:8b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:70b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.1:70b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.1-70B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.1:70b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:70b
-  provider_id: ollama
-  provider_model_id: llama3.1:70b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:405b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.1:405b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: ollama
-  provider_model_id: llama3.1:405b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.1:405b
-  provider_id: ollama
-  provider_model_id: llama3.1:405b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:1b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2:1b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-1B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2:1b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:1b
-  provider_id: ollama
-  provider_model_id: llama3.2:1b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:3b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2:3b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-3B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2:3b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2:3b
-  provider_id: ollama
-  provider_model_id: llama3.2:3b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:11b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:11b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:11b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:latest
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:latest
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:90b-instruct-fp16
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:90b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:90b-instruct-fp16
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.2-vision:90b
-  provider_id: ollama
-  provider_model_id: llama3.2-vision:90b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama3.3:70b
-  provider_id: ollama
-  provider_model_id: llama3.3:70b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-3.3-70B-Instruct
-  provider_id: ollama
-  provider_model_id: llama3.3:70b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama-guard3:8b
-  provider_id: ollama
-  provider_model_id: llama-guard3:8b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-Guard-3-8B
-  provider_id: ollama
-  provider_model_id: llama-guard3:8b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/llama-guard3:1b
-  provider_id: ollama
-  provider_model_id: llama-guard3:1b
-  model_type: llm
-- metadata: {}
-  model_id: ollama/meta-llama/Llama-Guard-3-1B
-  provider_id: ollama
-  provider_model_id: llama-guard3:1b
+  provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__}
   model_type: llm
 - metadata:
-    embedding_dimension: 384
-    context_length: 512
-  model_id: ollama/all-minilm:latest
+    embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384}
+  model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
   provider_id: ollama
-  provider_model_id: all-minilm:latest
-  model_type: embedding
-- metadata:
-    embedding_dimension: 384
-    context_length: 512
-  model_id: ollama/all-minilm
-  provider_id: ollama
-  provider_model_id: all-minilm:latest
-  model_type: embedding
-- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: ollama/nomic-embed-text
-  provider_id: ollama
-  provider_model_id: nomic-embed-text
+  provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
   model_type: embedding
 - metadata: {}
   model_id: anthropic/claude-3-5-sonnet-latest
@@ -938,6 +801,11 @@ models:
   provider_id: sambanova
   provider_model_id: sambanova/Meta-Llama-Guard-3-8B
   model_type: llm
+- metadata: {}
+  model_id: vllm/${env.VLLM_INFERENCE_MODEL:=__disabled__}
+  provider_id: vllm
+  provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__}
+  model_type: llm
 - metadata:
     embedding_dimension: 384
   model_id: all-MiniLM-L6-v2
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index 2a44a0a37..df31fed84 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
@@ -16,6 +16,7 @@ from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplCo
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
     SQLiteVectorIOConfig,
 )
@@ -36,9 +37,6 @@ from llama_stack.providers.remote.inference.groq.models import (
     MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
-from llama_stack.providers.remote.inference.ollama.models import (
-    MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES,
-)
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.models import (
     MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
@@ -71,48 +69,67 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
         (
             "openai",
             OPENAI_MODEL_ENTRIES,
-            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
+            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"),
         ),
         (
             "fireworks",
             FIREWORKS_MODEL_ENTRIES,
-            FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:}"),
+            FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:+}"),
         ),
         (
             "together",
             TOGETHER_MODEL_ENTRIES,
-            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
+            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"),
         ),
         (
             "ollama",
-            OLLAMA_MODEL_ENTRIES,
-            OllamaImplConfig.sample_run_config(),
+            [
+                ProviderModelEntry(
+                    provider_model_id="${env.OLLAMA_INFERENCE_MODEL:=__disabled__}",
+                    model_type=ModelType.llm,
+                ),
+                ProviderModelEntry(
+                    provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}",
+                    model_type=ModelType.embedding,
+                    metadata={
+                        "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:=384}",
+                    },
+                ),
+            ],
+            OllamaImplConfig.sample_run_config(
+                url="${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error=False
+            ),
         ),
         (
             "anthropic",
             ANTHROPIC_MODEL_ENTRIES,
-            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"),
+            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"),
         ),
         (
             "gemini",
             GEMINI_MODEL_ENTRIES,
-            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
+            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"),
         ),
         (
             "groq",
             GROQ_MODEL_ENTRIES,
-            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
+            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"),
         ),
         (
             "sambanova",
             SAMBANOVA_MODEL_ENTRIES,
-            SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:}"),
+            SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:+}"),
         ),
         (
             "vllm",
-            [],
+            [
+                ProviderModelEntry(
+                    provider_model_id="${env.VLLM_INFERENCE_MODEL:=__disabled__}",
+                    model_type=ModelType.llm,
+                ),
+            ],
             VLLMInferenceAdapterConfig.sample_run_config(
-                url="${env.VLLM_URL:http://localhost:8000/v1}",
+                url="${env.VLLM_URL:=http://localhost:8000/v1}",
             ),
         ),
     ]
@@ -153,22 +170,27 @@ def get_distribution_template() -> DistributionTemplate:
 
     vector_io_providers = [
         Provider(
-            provider_id="sqlite-vec",
+            provider_id="faiss",
+            provider_type="inline::faiss",
+            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}",
             provider_type="inline::sqlite-vec",
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
             provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:}",
-                user="${env.PGVECTOR_USER:}",
-                password="${env.PGVECTOR_PASSWORD:}",
+                db="${env.PGVECTOR_DB:+}",
+                user="${env.PGVECTOR_USER:+}",
+                password="${env.PGVECTOR_PASSWORD:+}",
             ),
         ),
     ]
@@ -257,7 +279,27 @@ def get_distribution_template() -> DistributionTemplate:
             ),
             "VLLM_URL": (
                 "http://localhost:8000/v1",
-                "VLLM URL",
+                "vLLM URL",
+            ),
+            "VLLM_INFERENCE_MODEL": (
+                "",
+                "Optional vLLM Inference Model to register on startup",
+            ),
+            "OLLAMA_URL": (
+                "http://localhost:11434",
+                "Ollama URL",
+            ),
+            "OLLAMA_INFERENCE_MODEL": (
+                "",
+                "Optional Ollama Inference Model to register on startup",
+            ),
+            "OLLAMA_EMBEDDING_MODEL": (
+                "",
+                "Optional Ollama Embedding Model to register on startup",
+            ),
+            "OLLAMA_EMBEDDING_DIMENSION": (
+                "384",
+                "Ollama Embedding Dimension",
             ),
         },
     )
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 712d2dcb4..7badff140 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -13,7 +13,7 @@ import yaml
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     Api,
     BenchmarkInput,
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
index 361b0b680..3ac3968e8 100644
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use (an external) TGI server for running LLM inference
   providers:
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index 22b7bcde6..63da62a03 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: tgi
 apis:
 - agents
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -104,10 +104,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index dd012323c..430494121 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: tgi
 apis:
 - agents
@@ -26,7 +26,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -39,17 +39,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -57,7 +57,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -65,14 +65,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -83,17 +83,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -103,10 +103,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
index 2c97cbf80..394cde18e 100644
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
index 5ffeac873..518a843da 100644
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use Together.AI for running LLM inference
   providers:
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index a24843416..7ae2a1d1a 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: together
 apis:
 - agents
@@ -16,7 +16,7 @@ providers:
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -45,17 +45,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -63,7 +63,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -71,14 +71,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -89,17 +89,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -110,13 +110,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index c71f960bd..dc09aeac9 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: together
 apis:
 - agents
@@ -16,7 +16,7 @@ providers:
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:}
+      api_key: ${env.TOGETHER_API_KEY:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -27,7 +27,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -40,17 +40,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -58,7 +58,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -66,14 +66,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -84,17 +84,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -105,13 +105,13 @@ providers:
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index 7761bd9fd..4c64ff3cd 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml
index d5ff0f1f4..147dca50d 100644
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use a built-in vLLM engine for running LLM inference
   providers:
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index 6878c22b2..104b3a239 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: vllm-gpu
 apis:
 - agents
@@ -15,12 +15,12 @@ providers:
   - provider_id: vllm
     provider_type: inline::vllm
     config:
-      tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1}
-      max_tokens: ${env.MAX_TOKENS:4096}
-      max_model_len: ${env.MAX_MODEL_LEN:4096}
-      max_num_seqs: ${env.MAX_NUM_SEQS:4}
-      enforce_eager: ${env.ENFORCE_EAGER:False}
-      gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.3}
+      tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1}
+      max_tokens: ${env.MAX_TOKENS:=4096}
+      max_model_len: ${env.MAX_MODEL_LEN:=4096}
+      max_num_seqs: ${env.MAX_NUM_SEQS:=4}
+      enforce_eager: ${env.ENFORCE_EAGER:=False}
+      gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -31,7 +31,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -44,17 +44,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -62,7 +62,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -70,14 +70,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -88,17 +88,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -108,10 +108,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/inference_store.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py
index 5775138b1..443fcd7a3 100644
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/llama_stack/templates/watsonx/build.yaml b/llama_stack/templates/watsonx/build.yaml
index e68ace183..08ee2c5ce 100644
--- a/llama_stack/templates/watsonx/build.yaml
+++ b/llama_stack/templates/watsonx/build.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 distribution_spec:
   description: Use watsonx for running LLM inference
   providers:
diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml
index d60a87906..678bf72ff 100644
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@@ -1,4 +1,4 @@
-version: '2'
+version: 2
 image_name: watsonx
 apis:
 - agents
@@ -15,9 +15,9 @@ providers:
   - provider_id: watsonx
     provider_type: remote::watsonx
     config:
-      url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
-      api_key: ${env.WATSONX_API_KEY:}
-      project_id: ${env.WATSONX_PROJECT_ID:}
+      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:+}
+      project_id: ${env.WATSONX_PROJECT_ID:+}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
@@ -28,7 +28,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -41,17 +41,17 @@ providers:
       persistence_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
       responses_store:
         type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/responses_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -59,7 +59,7 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
@@ -67,14 +67,14 @@ providers:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
       kvstore:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
   scoring:
   - provider_id: basic
     provider_type: inline::basic
@@ -85,17 +85,17 @@ providers:
   - provider_id: braintrust
     provider_type: inline::braintrust
     config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
@@ -105,10 +105,10 @@ providers:
     config: {}
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
 inference_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/inference_store.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
 models:
 - metadata: {}
   model_id: meta-llama/llama-3-3-70b-instruct
diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py
index 802aaf8f1..7fa3a55e5 100644
--- a/llama_stack/templates/watsonx/watsonx.py
+++ b/llama_stack/templates/watsonx/watsonx.py
@@ -6,7 +6,7 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
diff --git a/pyproject.toml b/pyproject.toml
index 968a3ae60..99be1a80a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,8 +22,8 @@ classifiers = [
 ]
 dependencies = [
     "aiohttp",
-    "fastapi>=0.115.0,<1.0",
-    "fire",
+    "fastapi>=0.115.0,<1.0",                  # server
+    "fire",                                   # for MCP in LLS client
     "httpx",
     "huggingface-hub>=0.30.0,<1.0",
     "jinja2>=3.1.6",
@@ -34,15 +34,18 @@ dependencies = [
     "python-dotenv",
     "python-jose",
     "pydantic>=2",
-    "requests",
     "rich",
-    "setuptools",
     "starlette",
     "termcolor",
     "tiktoken",
     "pillow",
     "h11>=0.16.0",
-    "python-multipart>=0.0.20",
+    "python-multipart>=0.0.20",               # For fastapi Form
+    "uvicorn>=0.34.0",                        # server
+    "opentelemetry-sdk",                      # server
+    "opentelemetry-exporter-otlp-proto-http", # server
+    "aiosqlite>=0.21.0",                      # server - for metadata store
+    "asyncpg",                                # for metadata store
 ]
 
 [project.optional-dependencies]
@@ -67,7 +70,6 @@ dev = [
     "types-requests",
     "types-setuptools",
     "pre-commit",
-    "uvicorn",
     "ruamel.yaml",        # needed for openapi generator
 ]
 # These are the dependencies required for running unit tests.
@@ -80,7 +82,6 @@ unit = [
     "mcp",
     "chardet",
     "qdrant-client",
-    "opentelemetry-exporter-otlp-proto-http",
     "sqlalchemy",
     "sqlalchemy[asyncio]>=2.0.41",
     "blobfile",
@@ -96,8 +97,6 @@ test = [
     "aiohttp",
     "torch>=2.6.0",
     "torchvision>=0.21.0",
-    "opentelemetry-sdk",
-    "opentelemetry-exporter-otlp-proto-http",
     "chardet",
     "pypdf",
     "mcp",
@@ -106,6 +105,7 @@ test = [
     "transformers",
     "sqlalchemy",
     "sqlalchemy[asyncio]>=2.0.41",
+    "requests",
 ]
 docs = [
     "sphinx-autobuild",
@@ -122,6 +122,7 @@ docs = [
     "tomli",
     "linkify",
     "sphinxcontrib.openapi",
+    "requests",
 ]
 codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
 
@@ -204,6 +205,9 @@ unfixable = [
     "RUF001",
     "PLE2515",
 ]
+"llama_stack/apis/**/__init__.py" = [
+    "F403",
+] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
 mypy_path = ["llama_stack"]
diff --git a/requirements.txt b/requirements.txt
index 7e7aa38ce..2e016ef72 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,8 @@ aiohttp==3.11.13
     # via llama-stack
 aiosignal==1.3.2
     # via aiohttp
+aiosqlite==0.21.0
+    # via llama-stack
 annotated-types==0.7.0
     # via pydantic
 anyio==4.8.0
@@ -14,6 +16,8 @@ anyio==4.8.0
     #   llama-stack-client
     #   openai
     #   starlette
+asyncpg==0.30.0
+    # via llama-stack
 attrs==25.1.0
     # via
     #   aiohttp
@@ -27,11 +31,18 @@ certifi==2025.1.31
 charset-normalizer==3.4.1
     # via requests
 click==8.1.8
-    # via llama-stack-client
+    # via
+    #   llama-stack-client
+    #   uvicorn
 colorama==0.4.6 ; sys_platform == 'win32'
     # via
     #   click
     #   tqdm
+deprecated==1.2.18
+    # via
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-semantic-conventions
 distro==1.9.0
     # via
     #   llama-stack-client
@@ -50,10 +61,13 @@ frozenlist==1.5.0
     #   aiosignal
 fsspec==2024.12.0
     # via huggingface-hub
+googleapis-common-protos==1.67.0
+    # via opentelemetry-exporter-otlp-proto-http
 h11==0.16.0
     # via
     #   httpcore
     #   llama-stack
+    #   uvicorn
 hf-xet==1.1.5 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
     # via huggingface-hub
 httpcore==1.0.9
@@ -71,6 +85,8 @@ idna==3.10
     #   httpx
     #   requests
     #   yarl
+importlib-metadata==8.5.0
+    # via opentelemetry-api
 jinja2==3.1.6
     # via llama-stack
 jiter==0.8.2
@@ -95,6 +111,25 @@ numpy==2.2.3
     # via pandas
 openai==1.71.0
     # via llama-stack
+opentelemetry-api==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp-proto-common==1.30.0
+    # via opentelemetry-exporter-otlp-proto-http
+opentelemetry-exporter-otlp-proto-http==1.30.0
+    # via llama-stack
+opentelemetry-proto==1.30.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-sdk==1.30.0
+    # via
+    #   llama-stack
+    #   opentelemetry-exporter-otlp-proto-http
+opentelemetry-semantic-conventions==0.51b0
+    # via opentelemetry-sdk
 packaging==24.2
     # via huggingface-hub
 pandas==2.2.3
@@ -109,6 +144,10 @@ propcache==0.3.0
     # via
     #   aiohttp
     #   yarl
+protobuf==5.29.3
+    # via
+    #   googleapis-common-protos
+    #   opentelemetry-proto
 pyaml==25.1.0
     # via llama-stack-client
 pyasn1==0.4.8
@@ -148,7 +187,7 @@ regex==2024.11.6
 requests==2.32.4
     # via
     #   huggingface-hub
-    #   llama-stack
+    #   opentelemetry-exporter-otlp-proto-http
     #   tiktoken
 rich==13.9.4
     # via
@@ -160,8 +199,6 @@ rpds-py==0.22.3
     #   referencing
 rsa==4.9
     # via python-jose
-setuptools==80.8.0
-    # via llama-stack
 six==1.17.0
     # via
     #   ecdsa
@@ -189,11 +226,13 @@ tqdm==4.67.1
     #   openai
 typing-extensions==4.12.2
     # via
+    #   aiosqlite
     #   anyio
     #   fastapi
     #   huggingface-hub
     #   llama-stack-client
     #   openai
+    #   opentelemetry-sdk
     #   pydantic
     #   pydantic-core
     #   referencing
@@ -201,7 +240,13 @@ tzdata==2025.1
     # via pandas
 urllib3==2.3.0
     # via requests
+uvicorn==0.34.0
+    # via llama-stack
 wcwidth==0.2.13
     # via prompt-toolkit
+wrapt==1.17.2
+    # via deprecated
 yarl==1.18.3
     # via aiohttp
+zipp==3.21.0
+    # via importlib-metadata
diff --git a/tests/external-provider/llama-stack-provider-ollama/run.yaml b/tests/external-provider/llama-stack-provider-ollama/run.yaml
index 158f6800f..60cff7503 100644
--- a/tests/external-provider/llama-stack-provider-ollama/run.yaml
+++ b/tests/external-provider/llama-stack-provider-ollama/run.yaml
@@ -1,71 +1,101 @@
-version: '2'
+version: 2
 image_name: ollama
 apis:
+- agents
+- datasetio
+- eval
 - inference
+- safety
+- scoring
 - telemetry
 - tool_runtime
-- datasetio
 - vector_io
+
 providers:
   inference:
-  - provider_id: custom_ollama
-    provider_type: remote::custom_ollama
+  - provider_id: ollama
+    provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
   vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
-      kvstore:
+      metadata_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      agents_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
+      responses_store:
+        type: sqlite
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
+      service_name: "${env.OTEL_SERVICE_NAME:=\u200b}"
+      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      metadata_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   datasetio:
   - provider_id: huggingface
     provider_type: remote::huggingface
     config:
-      kvstore:
+      metadata_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
-      kvstore:
+      metadata_store:
         type: sqlite
         namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
   tool_runtime:
   - provider_id: brave-search
     provider_type: remote::brave-search
     config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: tavily-search
     provider_type: remote::tavily-search
     config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
       max_results: 3
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
   - provider_id: wolfram-alpha
     provider_type: remote::wolfram-alpha
     config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
+
 metadata_store:
   type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index 26eac527b..b0b123c45 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -221,3 +221,56 @@ def test_list_response_input_items_with_limit_and_order(openai_client, client_wi
             assert hasattr(item, "type")
             assert item.type == "message"
             assert item.role in ["user", "assistant"]
+
+
+@pytest.mark.skip(reason="Tool calling is not reliable.")
+def test_function_call_output_response(openai_client, client_with_models, text_model_id):
+    """Test handling of function call outputs in responses."""
+    if isinstance(client_with_models, LlamaStackAsLibraryClient):
+        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
+
+    client = openai_client
+
+    # First create a response that triggers a function call
+    response = client.responses.create(
+        model=text_model_id,
+        input=[
+            {
+                "role": "user",
+                "content": "what's the weather in tokyo? You MUST call the `get_weather` function to find out.",
+            }
+        ],
+        tools=[
+            {
+                "type": "function",
+                "name": "get_weather",
+                "description": "Get the weather in a given city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {"type": "string", "description": "The city to get the weather for"},
+                    },
+                },
+            }
+        ],
+        stream=False,
+    )
+
+    # Verify we got a function call
+    assert response.output[0].type == "function_call"
+    call_id = response.output[0].call_id
+
+    # Now send the function call output as a follow-up
+    response2 = client.responses.create(
+        model=text_model_id,
+        input=[{"type": "function_call_output", "call_id": call_id, "output": "sunny and warm"}],
+        previous_response_id=response.id,
+        stream=False,
+    )
+
+    # Verify the second response processed successfully
+    assert response2.id is not None
+    assert response2.output[0].type == "message"
+    assert (
+        "sunny" in response2.output[0].content[0].text.lower() or "warm" in response2.output[0].content[0].text.lower()
+    )
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 9cbdc8e51..0eeb68167 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -13,7 +13,7 @@ import pytest
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
 from llama_stack.apis.datatypes import Api
-from llama_stack.apis.models.models import Model, ModelType
+from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.shields.shields import Shield
 from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter
 from llama_stack.apis.vector_dbs.vector_dbs import VectorDB
diff --git a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
index e112bb6e5..2ebcd9970 100644
--- a/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
+++ b/tests/unit/providers/agents/meta_reference/fixtures/__init__.py
@@ -8,7 +8,7 @@ import os
 
 import yaml
 
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIChatCompletion,
 )
 
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index a3d798083..0d1ef8eca 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -27,8 +27,9 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseOutputMessageWebSearchToolCall,
     OpenAIResponseText,
     OpenAIResponseTextFormat,
+    WebSearchToolTypes,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartTextParam,
     OpenAIDeveloperMessageParam,
@@ -161,11 +162,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
     input_text = "What is the capital of Ireland?"
     model = "meta-llama/Llama-3.1-8B-Instruct"
 
-    mock_inference_api.openai_chat_completion.side_effect = [
-        fake_stream("tool_call_completion.yaml"),
-        fake_stream(),
-    ]
-
     openai_responses_impl.tool_groups_api.get_tool.return_value = Tool(
         identifier="web_search",
         provider_id="client",
@@ -182,39 +178,50 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
     )
 
     # Execute
-    result = await openai_responses_impl.create_openai_response(
-        input=input_text,
-        model=model,
-        temperature=0.1,
-        tools=[
-            OpenAIResponseInputToolWebSearch(
-                name="web_search",
-            )
-        ],
-    )
+    for tool_name in WebSearchToolTypes:
+        # Reset mock states as we loop through each tool type
+        mock_inference_api.openai_chat_completion.side_effect = [
+            fake_stream("tool_call_completion.yaml"),
+            fake_stream(),
+        ]
+        openai_responses_impl.tool_groups_api.get_tool.reset_mock()
+        openai_responses_impl.tool_runtime_api.invoke_tool.reset_mock()
+        openai_responses_impl.responses_store.store_response_object.reset_mock()
 
-    # Verify
-    first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
-    assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
-    assert first_call.kwargs["tools"] is not None
-    assert first_call.kwargs["temperature"] == 0.1
+        result = await openai_responses_impl.create_openai_response(
+            input=input_text,
+            model=model,
+            temperature=0.1,
+            tools=[
+                OpenAIResponseInputToolWebSearch(
+                    name=tool_name,
+                )
+            ],
+        )
 
-    second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
-    assert second_call.kwargs["messages"][-1].content == "Dublin"
-    assert second_call.kwargs["temperature"] == 0.1
+        # Verify
+        first_call = mock_inference_api.openai_chat_completion.call_args_list[0]
+        assert first_call.kwargs["messages"][0].content == "What is the capital of Ireland?"
+        assert first_call.kwargs["tools"] is not None
+        assert first_call.kwargs["temperature"] == 0.1
 
-    openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
-    openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
-        tool_name="web_search",
-        kwargs={"query": "What is the capital of Ireland?"},
-    )
+        second_call = mock_inference_api.openai_chat_completion.call_args_list[1]
+        assert second_call.kwargs["messages"][-1].content == "Dublin"
+        assert second_call.kwargs["temperature"] == 0.1
 
-    openai_responses_impl.responses_store.store_response_object.assert_called_once()
+        openai_responses_impl.tool_groups_api.get_tool.assert_called_once_with("web_search")
+        openai_responses_impl.tool_runtime_api.invoke_tool.assert_called_once_with(
+            tool_name="web_search",
+            kwargs={"query": "What is the capital of Ireland?"},
+        )
 
-    # Check that we got the content from our mocked tool execution result
-    assert len(result.output) >= 1
-    assert isinstance(result.output[1], OpenAIResponseMessage)
-    assert result.output[1].content[0].text == "Dublin"
+        openai_responses_impl.responses_store.store_response_object.assert_called_once()
+
+        # Check that we got the content from our mocked tool execution result
+        assert len(result.output) >= 1
+        assert isinstance(result.output[1], OpenAIResponseMessage)
+        assert result.output[1].content[0].text == "Dublin"
+        assert result.output[1].content[0].annotations == []
 
 
 @pytest.mark.asyncio
diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py
index 8c74f178b..73fc32a02 100644
--- a/tests/unit/providers/nvidia/test_safety.py
+++ b/tests/unit/providers/nvidia/test_safety.py
@@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from llama_stack.apis.inference.inference import CompletionMessage, UserMessage
+from llama_stack.apis.inference import CompletionMessage, UserMessage
 from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
 from llama_stack.apis.shields import Shield
 from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
index 4c75b8a2f..3598e4810 100644
--- a/tests/unit/providers/utils/inference/test_openai_compat.py
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@@ -7,7 +7,7 @@
 import pytest
 
 from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
     CompletionMessage,
     OpenAIAssistantMessageParam,
     OpenAIChatCompletionContentPartTextParam,
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 67f8a138f..10fa1e075 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -35,7 +35,7 @@
 
 import pytest
 
-from llama_stack.apis.models.models import Model
+from llama_stack.apis.models import Model
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
 
 
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 3bcd0613f..5eaca8a25 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -9,7 +9,7 @@ import random
 import numpy as np
 import pytest
 
-from llama_stack.apis.vector_io import Chunk
+from llama_stack.apis.vector_io import Chunk, ChunkMetadata
 
 EMBEDDING_DIMENSION = 384
 
@@ -33,6 +33,20 @@ def sample_chunks():
         for j in range(k)
         for i in range(n)
     ]
+    sample.extend(
+        [
+            Chunk(
+                content=f"Sentence {i} from document {j + k}",
+                chunk_metadata=ChunkMetadata(
+                    document_id=f"document-{j + k}",
+                    chunk_id=f"document-{j}-chunk-{i}",
+                    source=f"example source-{j + k}-{i}",
+                ),
+            )
+            for j in range(k)
+            for i in range(n)
+        ]
+    )
     return sample
 
 
diff --git a/tests/unit/providers/vector_io/test_chunk_utils.py b/tests/unit/providers/vector_io/test_chunk_utils.py
new file mode 100644
index 000000000..941928b6d
--- /dev/null
+++ b/tests/unit/providers/vector_io/test_chunk_utils.py
@@ -0,0 +1,66 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
+
+# This test is a unit test for the chunk_utils.py helpers. This should only contain
+# tests which are specific to this file. More general (API-level) tests should be placed in
+# tests/integration/vector_io/
+#
+# How to run this test:
+#
+# pytest tests/unit/providers/vector_io/test_chunk_utils.py \
+# -v -s --tb=short --disable-warnings --asyncio-mode=auto
+
+
+def test_generate_chunk_id():
+    chunks = [
+        Chunk(content="test", metadata={"document_id": "doc-1"}),
+        Chunk(content="test ", metadata={"document_id": "doc-1"}),
+        Chunk(content="test 3", metadata={"document_id": "doc-1"}),
+    ]
+
+    chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
+    assert chunk_ids == [
+        "177a1368-f6a8-0c50-6e92-18677f2c3de3",
+        "bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
+        "f68df25d-d9aa-ab4d-5684-64a233add20d",
+    ]
+
+
+def test_chunk_id():
+    # Test with existing chunk ID
+    chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
+    assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350"
+
+    # Test with document ID in metadata
+    chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
+    assert chunk_with_doc_id.chunk_id == generate_chunk_id("doc-1", "test")
+
+    # Test chunks with ChunkMetadata
+    chunk_with_metadata = Chunk(
+        content="test",
+        metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"},
+        chunk_metadata=ChunkMetadata(document_id="document_1"),
+    )
+    assert chunk_with_metadata.chunk_id == "chunk-id-1"
+
+    # Test with no ID or document ID
+    chunk_without_id = Chunk(content="test")
+    generated_id = chunk_without_id.chunk_id
+    assert isinstance(generated_id, str) and len(generated_id) == 36  # Should be a valid UUID
+
+
+def test_stored_chunk_id_alias():
+    # Test with existing chunk ID alias
+    chunk_with_alias = Chunk(content="test", metadata={"document_id": "existing-id", "chunk_id": "chunk-id-1"})
+    assert chunk_with_alias.chunk_id == "chunk-id-1"
+    serialized_chunk = chunk_with_alias.model_dump()
+    assert serialized_chunk["stored_chunk_id"] == "chunk-id-1"
+    # showing chunk_id is not serialized (i.e., a computed field)
+    assert "chunk_id" not in serialized_chunk
+    assert chunk_with_alias.stored_chunk_id == "chunk-id-1"
diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py
index 607eccb24..6902c8850 100644
--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@@ -81,7 +81,7 @@ __QUERY = "Sample query"
 
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)])
+@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)])
 async def test_qdrant_adapter_returns_expected_chunks(
     qdrant_adapter: QdrantVectorIOAdapter,
     vector_db_id,
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 6424b9e86..bbac717c7 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -15,7 +15,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
     SQLiteVecIndex,
     SQLiteVecVectorIOAdapter,
     _create_sqlite_connection,
-    generate_chunk_id,
 )
 
 # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain
@@ -65,6 +64,14 @@ async def test_query_chunks_vector(sqlite_vec_index, sample_chunks, sample_embed
     assert len(response.chunks) == 2
 
 
+@pytest.mark.xfail(reason="Chunk Metadata not yet supported for SQLite-vec", strict=True)
+async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks, sample_embeddings):
+    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
+    query_embedding = sample_embeddings[0]
+    response = await sqlite_vec_index.query_vector(query_embedding, k=2, score_threshold=0.0)
+    assert response.chunks[-1].chunk_metadata == sample_chunks[-1].chunk_metadata
+
+
 @pytest.mark.asyncio
 async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings):
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -150,21 +157,6 @@ async def sqlite_vec_adapter(sqlite_connection):
     await adapter.shutdown()
 
 
-def test_generate_chunk_id():
-    chunks = [
-        Chunk(content="test", metadata={"document_id": "doc-1"}),
-        Chunk(content="test ", metadata={"document_id": "doc-1"}),
-        Chunk(content="test 3", metadata={"document_id": "doc-1"}),
-    ]
-
-    chunk_ids = sorted([generate_chunk_id(chunk.metadata["document_id"], chunk.content) for chunk in chunks])
-    assert chunk_ids == [
-        "177a1368-f6a8-0c50-6e92-18677f2c3de3",
-        "bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
-        "f68df25d-d9aa-ab4d-5684-64a233add20d",
-    ]
-
-
 @pytest.mark.asyncio
 async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings):
     """Test hybrid search when keyword search returns no matches - should still return vector results."""
@@ -339,7 +331,7 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks
     # Verify scores are in descending order
     assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
     # Verify we get results from both the vector-similar document and keyword-matched document
-    doc_ids = {chunk.metadata["document_id"] for chunk in response.chunks}
+    doc_ids = {chunk.metadata.get("document_id") or chunk.chunk_metadata.document_id for chunk in response.chunks}
     assert "document-0" in doc_ids  # From vector search
     assert "document-2" in doc_ids  # From keyword search
 
@@ -364,7 +356,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization(
         reranker_params={"alpha": 1.0},
     )
     assert len(response.chunks) > 0  # Should get at least one result
-    assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks)
+    assert any(
+        "document-0"
+        in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else ""))
+        for chunk in response.chunks
+    )
 
     # alpha=0.0 (should behave like pure vector)
     response = await sqlite_vec_index.query_hybrid(
@@ -389,7 +385,11 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization(
         reranker_params={"alpha": 0.7},
     )
     assert len(response.chunks) > 0  # Should get at least one result
-    assert any("document-0" in chunk.metadata["document_id"] for chunk in response.chunks)
+    assert any(
+        "document-0"
+        in (chunk.metadata.get("document_id") or (chunk.chunk_metadata.document_id if chunk.chunk_metadata else ""))
+        for chunk in response.chunks
+    )
 
 
 @pytest.mark.asyncio
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index b9fd8cca4..d2dd1783b 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -4,10 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock
 
 import pytest
 
+from llama_stack.apis.vector_io import (
+    Chunk,
+    ChunkMetadata,
+    QueryChunksResponse,
+)
 from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
 
 
@@ -17,3 +22,41 @@ class TestRagQuery:
         rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
         with pytest.raises(ValueError):
             await rag_tool.query(content=MagicMock(), vector_db_ids=[])
+
+    @pytest.mark.asyncio
+    async def test_query_chunk_metadata_handling(self):
+        rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
+        content = "test query content"
+        vector_db_ids = ["db1"]
+
+        chunk_metadata = ChunkMetadata(
+            document_id="doc1",
+            chunk_id="chunk1",
+            source="test_source",
+            metadata_token_count=5,
+        )
+        interleaved_content = MagicMock()
+        chunk = Chunk(
+            content=interleaved_content,
+            metadata={
+                "key1": "value1",
+                "token_count": 10,
+                "metadata_token_count": 5,
+                # Note this is inserted into `metadata` during MemoryToolRuntimeImpl().insert()
+                "document_id": "doc1",
+            },
+            stored_chunk_id="chunk1",
+            chunk_metadata=chunk_metadata,
+        )
+
+        query_response = QueryChunksResponse(chunks=[chunk], scores=[1.0])
+
+        rag_tool.vector_io_api.query_chunks = AsyncMock(return_value=query_response)
+        result = await rag_tool.query(content=content, vector_db_ids=vector_db_ids)
+
+        assert result is not None
+        expected_metadata_string = (
+            "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}"
+        )
+        assert expected_metadata_string in result.content[1].text
+        assert result.content is not None
diff --git a/tests/unit/server/test_replace_env_vars.py b/tests/unit/server/test_replace_env_vars.py
index 7fcbbfde9..0fb7c395e 100644
--- a/tests/unit/server/test_replace_env_vars.py
+++ b/tests/unit/server/test_replace_env_vars.py
@@ -26,39 +26,44 @@ class TestReplaceEnvVars(unittest.TestCase):
         self.assertEqual(replace_env_vars("${env.TEST_VAR}"), "test_value")
 
     def test_default_value_when_not_set(self):
-        self.assertEqual(replace_env_vars("${env.NOT_SET:default}"), "default")
+        self.assertEqual(replace_env_vars("${env.NOT_SET:=default}"), "default")
 
     def test_default_value_when_set(self):
-        self.assertEqual(replace_env_vars("${env.TEST_VAR:default}"), "test_value")
+        self.assertEqual(replace_env_vars("${env.TEST_VAR:=default}"), "test_value")
 
     def test_default_value_when_empty(self):
-        self.assertEqual(replace_env_vars("${env.EMPTY_VAR:default}"), "default")
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR:=default}"), "default")
+
+    def test_empty_var_no_default(self):
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR_NO_DEFAULT:+}"), None)
 
     def test_conditional_value_when_set(self):
-        self.assertEqual(replace_env_vars("${env.TEST_VAR+conditional}"), "conditional")
+        self.assertEqual(replace_env_vars("${env.TEST_VAR:+conditional}"), "conditional")
 
     def test_conditional_value_when_not_set(self):
-        self.assertEqual(replace_env_vars("${env.NOT_SET+conditional}"), "")
+        self.assertEqual(replace_env_vars("${env.NOT_SET:+conditional}"), None)
 
     def test_conditional_value_when_empty(self):
-        self.assertEqual(replace_env_vars("${env.EMPTY_VAR+conditional}"), "")
+        self.assertEqual(replace_env_vars("${env.EMPTY_VAR:+conditional}"), None)
 
     def test_conditional_value_with_zero(self):
-        self.assertEqual(replace_env_vars("${env.ZERO_VAR+conditional}"), "conditional")
+        self.assertEqual(replace_env_vars("${env.ZERO_VAR:+conditional}"), "conditional")
 
     def test_mixed_syntax(self):
-        self.assertEqual(replace_env_vars("${env.TEST_VAR:default} and ${env.NOT_SET+conditional}"), "test_value and ")
         self.assertEqual(
-            replace_env_vars("${env.NOT_SET:default} and ${env.TEST_VAR+conditional}"), "default and conditional"
+            replace_env_vars("${env.TEST_VAR:=default} and ${env.NOT_SET:+conditional}"), "test_value and "
+        )
+        self.assertEqual(
+            replace_env_vars("${env.NOT_SET:=default} and ${env.TEST_VAR:+conditional}"), "default and conditional"
         )
 
     def test_nested_structures(self):
         data = {
-            "key1": "${env.TEST_VAR:default}",
-            "key2": ["${env.NOT_SET:default}", "${env.TEST_VAR+conditional}"],
-            "key3": {"nested": "${env.NOT_SET+conditional}"},
+            "key1": "${env.TEST_VAR:=default}",
+            "key2": ["${env.NOT_SET:=default}", "${env.TEST_VAR:+conditional}"],
+            "key3": {"nested": "${env.NOT_SET:+conditional}"},
         }
-        expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": ""}}
+        expected = {"key1": "test_value", "key2": ["default", "conditional"], "key3": {"nested": None}}
         self.assertEqual(replace_env_vars(data), expected)
 
 
diff --git a/uv.lock b/uv.lock
index 42eece4e1..31e296642 100644
--- a/uv.lock
+++ b/uv.lock
@@ -158,6 +158,38 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" },
 ]
 
+[[package]]
+name = "asyncpg"
+version = "0.30.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4c/0e/f5d708add0d0b97446c402db7e8dd4c4183c13edaabe8a8500b411e7b495/asyncpg-0.30.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5e0511ad3dec5f6b4f7a9e063591d407eee66b88c14e2ea636f187da1dcfff6a", size = 674506, upload-time = "2024-10-20T00:29:27.988Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a0/67ec9a75cb24a1d99f97b8437c8d56da40e6f6bd23b04e2f4ea5d5ad82ac/asyncpg-0.30.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:915aeb9f79316b43c3207363af12d0e6fd10776641a7de8a01212afd95bdf0ed", size = 645922, upload-time = "2024-10-20T00:29:29.391Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/d9/a7584f24174bd86ff1053b14bb841f9e714380c672f61c906eb01d8ec433/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c198a00cce9506fcd0bf219a799f38ac7a237745e1d27f0e1f66d3707c84a5a", size = 3079565, upload-time = "2024-10-20T00:29:30.832Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/d7/a4c0f9660e333114bdb04d1a9ac70db690dd4ae003f34f691139a5cbdae3/asyncpg-0.30.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3326e6d7381799e9735ca2ec9fd7be4d5fef5dcbc3cb555d8a463d8460607956", size = 3109962, upload-time = "2024-10-20T00:29:33.114Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/21/199fd16b5a981b1575923cbb5d9cf916fdc936b377e0423099f209e7e73d/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:51da377487e249e35bd0859661f6ee2b81db11ad1f4fc036194bc9cb2ead5056", size = 3064791, upload-time = "2024-10-20T00:29:34.677Z" },
+    { url = "https://files.pythonhosted.org/packages/77/52/0004809b3427534a0c9139c08c87b515f1c77a8376a50ae29f001e53962f/asyncpg-0.30.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc6d84136f9c4d24d358f3b02be4b6ba358abd09f80737d1ac7c444f36108454", size = 3188696, upload-time = "2024-10-20T00:29:36.389Z" },
+    { url = "https://files.pythonhosted.org/packages/52/cb/fbad941cd466117be58b774a3f1cc9ecc659af625f028b163b1e646a55fe/asyncpg-0.30.0-cp311-cp311-win32.whl", hash = "sha256:574156480df14f64c2d76450a3f3aaaf26105869cad3865041156b38459e935d", size = 567358, upload-time = "2024-10-20T00:29:37.915Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0a/0a32307cf166d50e1ad120d9b81a33a948a1a5463ebfa5a96cc5606c0863/asyncpg-0.30.0-cp311-cp311-win_amd64.whl", hash = "sha256:3356637f0bd830407b5597317b3cb3571387ae52ddc3bca6233682be88bbbc1f", size = 629375, upload-time = "2024-10-20T00:29:39.987Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/64/9d3e887bb7b01535fdbc45fbd5f0a8447539833b97ee69ecdbb7a79d0cb4/asyncpg-0.30.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c902a60b52e506d38d7e80e0dd5399f657220f24635fee368117b8b5fce1142e", size = 673162, upload-time = "2024-10-20T00:29:41.88Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/eb/8b236663f06984f212a087b3e849731f917ab80f84450e943900e8ca4052/asyncpg-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aca1548e43bbb9f0f627a04666fedaca23db0a31a84136ad1f868cb15deb6e3a", size = 637025, upload-time = "2024-10-20T00:29:43.352Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/57/2dc240bb263d58786cfaa60920779af6e8d32da63ab9ffc09f8312bd7a14/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c2a2ef565400234a633da0eafdce27e843836256d40705d83ab7ec42074efb3", size = 3496243, upload-time = "2024-10-20T00:29:44.922Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/40/0ae9d061d278b10713ea9021ef6b703ec44698fe32178715a501ac696c6b/asyncpg-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1292b84ee06ac8a2ad8e51c7475aa309245874b61333d97411aab835c4a2f737", size = 3575059, upload-time = "2024-10-20T00:29:46.891Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/75/d6b895a35a2c6506952247640178e5f768eeb28b2e20299b6a6f1d743ba0/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0f5712350388d0cd0615caec629ad53c81e506b1abaaf8d14c93f54b35e3595a", size = 3473596, upload-time = "2024-10-20T00:29:49.201Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e7/3693392d3e168ab0aebb2d361431375bd22ffc7b4a586a0fc060d519fae7/asyncpg-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:db9891e2d76e6f425746c5d2da01921e9a16b5a71a1c905b13f30e12a257c4af", size = 3641632, upload-time = "2024-10-20T00:29:50.768Z" },
+    { url = "https://files.pythonhosted.org/packages/32/ea/15670cea95745bba3f0352341db55f506a820b21c619ee66b7d12ea7867d/asyncpg-0.30.0-cp312-cp312-win32.whl", hash = "sha256:68d71a1be3d83d0570049cd1654a9bdfe506e794ecc98ad0873304a9f35e411e", size = 560186, upload-time = "2024-10-20T00:29:52.394Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/6b/fe1fad5cee79ca5f5c27aed7bd95baee529c1bf8a387435c8ba4fe53d5c1/asyncpg-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:9a0292c6af5c500523949155ec17b7fe01a00ace33b68a476d6b5059f9630305", size = 621064, upload-time = "2024-10-20T00:29:53.757Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/22/e20602e1218dc07692acf70d5b902be820168d6282e69ef0d3cb920dc36f/asyncpg-0.30.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05b185ebb8083c8568ea8a40e896d5f7af4b8554b64d7719c0eaa1eb5a5c3a70", size = 670373, upload-time = "2024-10-20T00:29:55.165Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/b3/0cf269a9d647852a95c06eb00b815d0b95a4eb4b55aa2d6ba680971733b9/asyncpg-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c47806b1a8cbb0a0db896f4cd34d89942effe353a5035c62734ab13b9f938da3", size = 634745, upload-time = "2024-10-20T00:29:57.14Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/6d/a4f31bf358ce8491d2a31bfe0d7bcf25269e80481e49de4d8616c4295a34/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b6fde867a74e8c76c71e2f64f80c64c0f3163e687f1763cfaf21633ec24ec33", size = 3512103, upload-time = "2024-10-20T00:29:58.499Z" },
+    { url = "https://files.pythonhosted.org/packages/96/19/139227a6e67f407b9c386cb594d9628c6c78c9024f26df87c912fabd4368/asyncpg-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46973045b567972128a27d40001124fbc821c87a6cade040cfcd4fa8a30bcdc4", size = 3592471, upload-time = "2024-10-20T00:30:00.354Z" },
+    { url = "https://files.pythonhosted.org/packages/67/e4/ab3ca38f628f53f0fd28d3ff20edff1c975dd1cb22482e0061916b4b9a74/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9110df111cabc2ed81aad2f35394a00cadf4f2e0635603db6ebbd0fc896f46a4", size = 3496253, upload-time = "2024-10-20T00:30:02.794Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/5f/0bf65511d4eeac3a1f41c54034a492515a707c6edbc642174ae79034d3ba/asyncpg-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04ff0785ae7eed6cc138e73fc67b8e51d54ee7a3ce9b63666ce55a0bf095f7ba", size = 3662720, upload-time = "2024-10-20T00:30:04.501Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/31/1513d5a6412b98052c3ed9158d783b1e09d0910f51fbe0e05f56cc370bc4/asyncpg-0.30.0-cp313-cp313-win32.whl", hash = "sha256:ae374585f51c2b444510cdf3595b97ece4f233fde739aa14b50e0d64e8a7a590", size = 560404, upload-time = "2024-10-20T00:30:06.537Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/a4/cec76b3389c4c5ff66301cd100fe88c318563ec8a520e0b2e792b5b84972/asyncpg-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:f59b430b8e27557c3fb9869222559f7417ced18688375825f8f12302c34e915e", size = 621623, upload-time = "2024-10-20T00:30:09.024Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.1.0"
@@ -1187,6 +1219,8 @@ version = "0.2.12"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },
+    { name = "aiosqlite" },
+    { name = "asyncpg" },
     { name = "fastapi" },
     { name = "fire" },
     { name = "h11" },
@@ -1196,18 +1230,19 @@ dependencies = [
     { name = "jsonschema" },
     { name = "llama-stack-client" },
     { name = "openai" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
     { name = "pillow" },
     { name = "prompt-toolkit" },
     { name = "pydantic" },
     { name = "python-dotenv" },
     { name = "python-jose" },
     { name = "python-multipart" },
-    { name = "requests" },
     { name = "rich" },
-    { name = "setuptools" },
     { name = "starlette" },
     { name = "termcolor" },
     { name = "tiktoken" },
+    { name = "uvicorn" },
 ]
 
 [package.optional-dependencies]
@@ -1238,11 +1273,11 @@ dev = [
     { name = "ruff" },
     { name = "types-requests" },
     { name = "types-setuptools" },
-    { name = "uvicorn" },
 ]
 docs = [
     { name = "linkify" },
     { name = "myst-parser" },
+    { name = "requests" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
     { name = "sphinx-copybutton" },
@@ -1264,9 +1299,8 @@ test = [
     { name = "datasets" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
-    { name = "opentelemetry-sdk" },
     { name = "pypdf" },
+    { name = "requests" },
     { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" },
     { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" },
@@ -1282,7 +1316,6 @@ unit = [
     { name = "faiss-cpu" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "pypdf" },
     { name = "qdrant-client" },
     { name = "sqlalchemy", extra = ["asyncio"] },
@@ -1292,6 +1325,8 @@ unit = [
 [package.metadata]
 requires-dist = [
     { name = "aiohttp" },
+    { name = "aiosqlite", specifier = ">=0.21.0" },
+    { name = "asyncpg" },
     { name = "fastapi", specifier = ">=0.115.0,<1.0" },
     { name = "fire" },
     { name = "h11", specifier = ">=0.16.0" },
@@ -1302,6 +1337,8 @@ requires-dist = [
     { name = "llama-stack-client", specifier = ">=0.2.12" },
     { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.12" },
     { name = "openai", specifier = ">=1.66" },
+    { name = "opentelemetry-exporter-otlp-proto-http" },
+    { name = "opentelemetry-sdk" },
     { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
     { name = "prompt-toolkit" },
@@ -1309,14 +1346,13 @@ requires-dist = [
     { name = "python-dotenv" },
     { name = "python-jose" },
     { name = "python-multipart", specifier = ">=0.0.20" },
-    { name = "requests" },
     { name = "rich" },
-    { name = "setuptools" },
     { name = "starlette" },
     { name = "streamlit", marker = "extra == 'ui'" },
     { name = "streamlit-option-menu", marker = "extra == 'ui'" },
     { name = "termcolor" },
     { name = "tiktoken" },
+    { name = "uvicorn", specifier = ">=0.34.0" },
 ]
 provides-extras = ["ui"]
 
@@ -1340,11 +1376,11 @@ dev = [
     { name = "ruff" },
     { name = "types-requests" },
     { name = "types-setuptools" },
-    { name = "uvicorn" },
 ]
 docs = [
     { name = "linkify" },
     { name = "myst-parser" },
+    { name = "requests" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
     { name = "sphinx-copybutton" },
@@ -1366,9 +1402,8 @@ test = [
     { name = "datasets" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
-    { name = "opentelemetry-sdk" },
     { name = "pypdf" },
+    { name = "requests" },
     { name = "sqlalchemy" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "torch", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cpu" },
@@ -1383,7 +1418,6 @@ unit = [
     { name = "faiss-cpu" },
     { name = "mcp" },
     { name = "openai" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "pypdf" },
     { name = "qdrant-client" },
     { name = "sqlalchemy" },