Merge branch 'main' into chore_build

2025-06-28 19:04:19 +00:00 · 2025-06-26 13:30:43 +02:00 · 2025-06-26 13:30:43 +02:00 · 604e42c56d
commit 604e42c56d
parent 63257b3b9b dbdc811d16
196 changed files with 2332 additions and 1515 deletions
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -11,6 +11,8 @@ on:
      - 'llama_stack/distribution/*.sh'
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
  pull_request:
    paths:
      - 'llama_stack/cli/stack/build.py'
@ -19,6 +21,7 @@ on:
      - 'llama_stack/distribution/*.sh'
      - '.github/workflows/providers-build.yml'
      - 'llama_stack/templates/**'
      - 'pyproject.toml'
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -7390,6 +7390,147 @@
                ],
                "title": "AgentTurnResponseTurnStartPayload"
            },
            "OpenAIResponseAnnotationCitation": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "url_citation",
                        "default": "url_citation"
                    },
                    "end_index": {
                        "type": "integer"
                    },
                    "start_index": {
                        "type": "integer"
                    },
                    "title": {
                        "type": "string"
                    },
                    "url": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "end_index",
                    "start_index",
                    "title",
                    "url"
                ],
                "title": "OpenAIResponseAnnotationCitation"
            },
            "OpenAIResponseAnnotationContainerFileCitation": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "container_file_citation",
                        "default": "container_file_citation"
                    },
                    "container_id": {
                        "type": "string"
                    },
                    "end_index": {
                        "type": "integer"
                    },
                    "file_id": {
                        "type": "string"
                    },
                    "filename": {
                        "type": "string"
                    },
                    "start_index": {
                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "container_id",
                    "end_index",
                    "file_id",
                    "filename",
                    "start_index"
                ],
                "title": "OpenAIResponseAnnotationContainerFileCitation"
            },
            "OpenAIResponseAnnotationFileCitation": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "file_citation",
                        "default": "file_citation"
                    },
                    "file_id": {
                        "type": "string"
                    },
                    "filename": {
                        "type": "string"
                    },
                    "index": {
                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "file_id",
                    "filename",
                    "index"
                ],
                "title": "OpenAIResponseAnnotationFileCitation"
            },
            "OpenAIResponseAnnotationFilePath": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "file_path",
                        "default": "file_path"
                    },
                    "file_id": {
                        "type": "string"
                    },
                    "index": {
                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "file_id",
                    "index"
                ],
                "title": "OpenAIResponseAnnotationFilePath"
            },
            "OpenAIResponseAnnotations": {
                "oneOf": [
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFileCitation"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationCitation"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation"
                    },
                    {
                        "$ref": "#/components/schemas/OpenAIResponseAnnotationFilePath"
                    }
                ],
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
                        "file_citation": "#/components/schemas/OpenAIResponseAnnotationFileCitation",
                        "url_citation": "#/components/schemas/OpenAIResponseAnnotationCitation",
                        "container_file_citation": "#/components/schemas/OpenAIResponseAnnotationContainerFileCitation",
                        "file_path": "#/components/schemas/OpenAIResponseAnnotationFilePath"
                    }
                }
            },
            "OpenAIResponseInput": {
                "oneOf": [
                    {
@ -7764,6 +7905,10 @@
                                "type": "string",
                                "const": "web_search"
                            },
                            {
                                "type": "string",
                                "const": "web_search_preview"
                            },
                            {
                                "type": "string",
                                "const": "web_search_preview_2025_03_11"
@ -7855,12 +8000,19 @@
                        "type": "string",
                        "const": "output_text",
                        "default": "output_text"
                    },
                    "annotations": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/OpenAIResponseAnnotations"
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "text",
-                    "type"
+                    "type",
                    "annotations"
                ],
                "title": "OpenAIResponseOutputMessageContentOutputText"
            },
@ -11190,6 +11342,115 @@
                ],
                "title": "InsertRequest"
            },
            "Chunk": {
                "type": "object",
                "properties": {
                    "content": {
                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
                    },
                    "metadata": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "null"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "array"
                                },
                                {
                                    "type": "object"
                                }
                            ]
                        },
                        "description": "Metadata associated with the chunk that will be used in the model context during inference."
                    },
                    "embedding": {
                        "type": "array",
                        "items": {
                            "type": "number"
                        },
                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                    },
                    "stored_chunk_id": {
                        "type": "string",
                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
                    },
                    "chunk_metadata": {
                        "$ref": "#/components/schemas/ChunkMetadata",
                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "content",
                    "metadata"
                ],
                "title": "Chunk",
                "description": "A chunk of content that can be inserted into a vector database."
            },
            "ChunkMetadata": {
                "type": "object",
                "properties": {
                    "chunk_id": {
                        "type": "string",
                        "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content."
                    },
                    "document_id": {
                        "type": "string",
                        "description": "The ID of the document this chunk belongs to."
                    },
                    "source": {
                        "type": "string",
                        "description": "The source of the content, such as a URL, file path, or other identifier."
                    },
                    "created_timestamp": {
                        "type": "integer",
                        "description": "An optional timestamp indicating when the chunk was created."
                    },
                    "updated_timestamp": {
                        "type": "integer",
                        "description": "An optional timestamp indicating when the chunk was last updated."
                    },
                    "chunk_window": {
                        "type": "string",
                        "description": "The window of the chunk, which can be used to group related chunks together."
                    },
                    "chunk_tokenizer": {
                        "type": "string",
                        "description": "The tokenizer used to create the chunk. Default is Tiktoken."
                    },
                    "chunk_embedding_model": {
                        "type": "string",
                        "description": "The embedding model used to create the chunk's embedding."
                    },
                    "chunk_embedding_dimension": {
                        "type": "integer",
                        "description": "The dimension of the embedding vector for the chunk."
                    },
                    "content_token_count": {
                        "type": "integer",
                        "description": "The number of tokens in the content of the chunk."
                    },
                    "metadata_token_count": {
                        "type": "integer",
                        "description": "The number of tokens in the metadata of the chunk."
                    }
                },
                "additionalProperties": false,
                "title": "ChunkMetadata",
                "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that     will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`     is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.     Use `Chunk.metadata` for metadata that will be used in the context during inference."
            },
            "InsertChunksRequest": {
                "type": "object",
                "properties": {
@ -11200,53 +11461,7 @@
                    "chunks": {
                        "type": "array",
                        "items": {
-                            "type": "object",
+                            "$ref": "#/components/schemas/Chunk"
                            "properties": {
                                "content": {
                                    "$ref": "#/components/schemas/InterleavedContent",
                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
                                },
                                "metadata": {
                                    "type": "object",
                                    "additionalProperties": {
                                        "oneOf": [
                                            {
                                                "type": "null"
                                            },
                                            {
                                                "type": "boolean"
                                            },
                                            {
                                                "type": "number"
                                            },
                                            {
                                                "type": "string"
                                            },
                                            {
                                                "type": "array"
                                            },
                                            {
                                                "type": "object"
                                            }
                                        ]
                                    },
                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
                                },
                                "embedding": {
                                    "type": "array",
                                    "items": {
                                        "type": "number"
                                    },
                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "content",
                                "metadata"
                            ],
                            "title": "Chunk",
                            "description": "A chunk of content that can be inserted into a vector database."
                        },
                        "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
                    },
@ -14671,53 +14886,7 @@
                    "chunks": {
                        "type": "array",
                        "items": {
-                            "type": "object",
+                            "$ref": "#/components/schemas/Chunk"
                            "properties": {
                                "content": {
                                    "$ref": "#/components/schemas/InterleavedContent",
                                    "description": "The content of the chunk, which can be interleaved text, images, or other types."
                                },
                                "metadata": {
                                    "type": "object",
                                    "additionalProperties": {
                                        "oneOf": [
                                            {
                                                "type": "null"
                                            },
                                            {
                                                "type": "boolean"
                                            },
                                            {
                                                "type": "number"
                                            },
                                            {
                                                "type": "string"
                                            },
                                            {
                                                "type": "array"
                                            },
                                            {
                                                "type": "object"
                                            }
                                        ]
                                    },
                                    "description": "Metadata associated with the chunk, such as document ID, source, or other relevant information."
                                },
                                "embedding": {
                                    "type": "array",
                                    "items": {
                                        "type": "number"
                                    },
                                    "description": "Optional embedding for the chunk. If not provided, it will be computed later."
                                }
                            },
                            "additionalProperties": false,
                            "required": [
                                "content",
                                "metadata"
                            ],
                            "title": "Chunk",
                            "description": "A chunk of content that can be inserted into a vector database."
                        }
                    },
                    "scores": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -5263,6 +5263,106 @@ components:
        - event_type
        - turn_id
      title: AgentTurnResponseTurnStartPayload
    OpenAIResponseAnnotationCitation:
      type: object
      properties:
        type:
          type: string
          const: url_citation
          default: url_citation
        end_index:
          type: integer
        start_index:
          type: integer
        title:
          type: string
        url:
          type: string
      additionalProperties: false
      required:
        - type
        - end_index
        - start_index
        - title
        - url
      title: OpenAIResponseAnnotationCitation
    "OpenAIResponseAnnotationContainerFileCitation":
      type: object
      properties:
        type:
          type: string
          const: container_file_citation
          default: container_file_citation
        container_id:
          type: string
        end_index:
          type: integer
        file_id:
          type: string
        filename:
          type: string
        start_index:
          type: integer
      additionalProperties: false
      required:
        - type
        - container_id
        - end_index
        - file_id
        - filename
        - start_index
      title: >-
        OpenAIResponseAnnotationContainerFileCitation
    OpenAIResponseAnnotationFileCitation:
      type: object
      properties:
        type:
          type: string
          const: file_citation
          default: file_citation
        file_id:
          type: string
        filename:
          type: string
        index:
          type: integer
      additionalProperties: false
      required:
        - type
        - file_id
        - filename
        - index
      title: OpenAIResponseAnnotationFileCitation
    OpenAIResponseAnnotationFilePath:
      type: object
      properties:
        type:
          type: string
          const: file_path
          default: file_path
        file_id:
          type: string
        index:
          type: integer
      additionalProperties: false
      required:
        - type
        - file_id
        - index
      title: OpenAIResponseAnnotationFilePath
    OpenAIResponseAnnotations:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
        - $ref: '#/components/schemas/OpenAIResponseAnnotationCitation'
        - $ref: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
        - $ref: '#/components/schemas/OpenAIResponseAnnotationFilePath'
      discriminator:
        propertyName: type
        mapping:
          file_citation: '#/components/schemas/OpenAIResponseAnnotationFileCitation'
          url_citation: '#/components/schemas/OpenAIResponseAnnotationCitation'
          container_file_citation: '#/components/schemas/OpenAIResponseAnnotationContainerFileCitation'
          file_path: '#/components/schemas/OpenAIResponseAnnotationFilePath'
    OpenAIResponseInput:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
@ -5488,6 +5588,8 @@ components:
          oneOf:
            - type: string
              const: web_search
            - type: string
              const: web_search_preview
            - type: string
              const: web_search_preview_2025_03_11
          default: web_search
@ -5547,10 +5649,15 @@ components:
          type: string
          const: output_text
          default: output_text
        annotations:
          type: array
          items:
            $ref: '#/components/schemas/OpenAIResponseAnnotations'
      additionalProperties: false
      required:
        - text
        - type
        - annotations
      title: >-
        OpenAIResponseOutputMessageContentOutputText
    "OpenAIResponseOutputMessageFileSearchToolCall":
@ -7867,6 +7974,107 @@ components:
        - vector_db_id
        - chunk_size_in_tokens
      title: InsertRequest
    Chunk:
      type: object
      properties:
        content:
          $ref: '#/components/schemas/InterleavedContent'
          description: >-
            The content of the chunk, which can be interleaved text, images, or other
            types.
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: >-
            Metadata associated with the chunk that will be used in the model context
            during inference.
        embedding:
          type: array
          items:
            type: number
          description: >-
            Optional embedding for the chunk. If not provided, it will be computed
            later.
        stored_chunk_id:
          type: string
          description: >-
            The chunk ID that is stored in the vector database. Used for backend functionality.
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: >-
            Metadata for the chunk that will NOT be used in the context during inference.
            The `chunk_metadata` is required backend functionality.
      additionalProperties: false
      required:
        - content
        - metadata
      title: Chunk
      description: >-
        A chunk of content that can be inserted into a vector database.
    ChunkMetadata:
      type: object
      properties:
        chunk_id:
          type: string
          description: >-
            The ID of the chunk. If not set, it will be generated based on the document
            ID and content.
        document_id:
          type: string
          description: >-
            The ID of the document this chunk belongs to.
        source:
          type: string
          description: >-
            The source of the content, such as a URL, file path, or other identifier.
        created_timestamp:
          type: integer
          description: >-
            An optional timestamp indicating when the chunk was created.
        updated_timestamp:
          type: integer
          description: >-
            An optional timestamp indicating when the chunk was last updated.
        chunk_window:
          type: string
          description: >-
            The window of the chunk, which can be used to group related chunks together.
        chunk_tokenizer:
          type: string
          description: >-
            The tokenizer used to create the chunk. Default is Tiktoken.
        chunk_embedding_model:
          type: string
          description: >-
            The embedding model used to create the chunk's embedding.
        chunk_embedding_dimension:
          type: integer
          description: >-
            The dimension of the embedding vector for the chunk.
        content_token_count:
          type: integer
          description: >-
            The number of tokens in the content of the chunk.
        metadata_token_count:
          type: integer
          description: >-
            The number of tokens in the metadata of the chunk.
      additionalProperties: false
      title: ChunkMetadata
      description: >-
        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
        information about the chunk that     will not be used in the context during
        inference, but is required for backend functionality. The `ChunkMetadata`     is
        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
        expected to change after.     Use `Chunk.metadata` for metadata that will
        be used in the context during inference.
    InsertChunksRequest:
      type: object
      properties:
@ -7877,40 +8085,7 @@ components:
        chunks:
          type: array
          items:
-            type: object
+            $ref: '#/components/schemas/Chunk'
            properties:
              content:
                $ref: '#/components/schemas/InterleavedContent'
                description: >-
                  The content of the chunk, which can be interleaved text, images,
                  or other types.
              metadata:
                type: object
                additionalProperties:
                  oneOf:
                    - type: 'null'
                    - type: boolean
                    - type: number
                    - type: string
                    - type: array
                    - type: object
                description: >-
                  Metadata associated with the chunk, such as document ID, source,
                  or other relevant information.
              embedding:
                type: array
                items:
                  type: number
                description: >-
                  Optional embedding for the chunk. If not provided, it will be computed
                  later.
            additionalProperties: false
            required:
              - content
              - metadata
            title: Chunk
            description: >-
              A chunk of content that can be inserted into a vector database.
          description: >-
            The chunks to insert. Each `Chunk` should contain content which can be
            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
@ -10231,40 +10406,7 @@ components:
        chunks:
          type: array
          items:
-            type: object
+            $ref: '#/components/schemas/Chunk'
            properties:
              content:
                $ref: '#/components/schemas/InterleavedContent'
                description: >-
                  The content of the chunk, which can be interleaved text, images,
                  or other types.
              metadata:
                type: object
                additionalProperties:
                  oneOf:
                    - type: 'null'
                    - type: boolean
                    - type: number
                    - type: string
                    - type: array
                    - type: object
                description: >-
                  Metadata associated with the chunk, such as document ID, source,
                  or other relevant information.
              embedding:
                type: array
                items:
                  type: number
                description: >-
                  Optional embedding for the chunk. If not provided, it will be computed
                  later.
            additionalProperties: false
            required:
              - content
              - metadata
            title: Chunk
            description: >-
              A chunk of content that can be inserted into a vector database.
        scores:
          type: array
          items:
--- a/docs/source/distributions/configuration.md
+++ b/docs/source/distributions/configuration.md
@ -18,7 +18,7 @@ providers:
  - provider_id: ollama
    provider_type: remote::ollama
    config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
@ -26,7 +26,7 @@ providers:
      kvstore:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -38,7 +38,7 @@ providers:
      persistence_store:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -46,7 +46,7 @@ providers:
 metadata_store:
  namespace: null
  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
@ -85,7 +85,7 @@ providers:
    # config is a dictionary that contains the configuration for the provider.
    # in this case, the configuration is the url of the ollama server
    config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
 ```
 A few things to note:
 - A _provider instance_ is identified with an (id, type, configuration) triplet.
@ -94,6 +94,95 @@ A few things to note:
 - The configuration dictionary is provider-specific.
 - Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
 ### Environment Variable Substitution
 Llama Stack supports environment variable substitution in configuration values using the
 `${env.VARIABLE_NAME}` syntax. This allows you to externalize configuration values and provide
 different settings for different environments. The syntax is inspired by [bash parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html)
 and follows similar patterns.
 #### Basic Syntax
 The basic syntax for environment variable substitution is:
 ```yaml
 config:
  api_key: ${env.API_KEY}
  url: ${env.SERVICE_URL}
 ```
 If the environment variable is not set, the server will raise an error during startup.
 #### Default Values
 You can provide default values using the `:=` operator:
 ```yaml
 config:
  url: ${env.OLLAMA_URL:=http://localhost:11434}
  port: ${env.PORT:=8321}
  timeout: ${env.TIMEOUT:=60}
 ```
 If the environment variable is not set, the default value `http://localhost:11434` will be used.
 Empty defaults are not allowed so `url: ${env.OLLAMA_URL:=}` will raise an error if the environment variable is not set.
 #### Conditional Values
 You can use the `:+` operator to provide a value only when the environment variable is set:
 ```yaml
 config:
  # Only include this field if ENVIRONMENT is set
  environment: ${env.ENVIRONMENT:+production}
 ```
 If the environment variable is set, the value after `:+` will be used. If it's not set, the field
 will be omitted with a `None` value.
 So `${env.ENVIRONMENT:+}` is supported, it means that the field will be omitted if the environment
 variable is not set. It can be used to make a field optional and then enabled at runtime when desired.
 #### Examples
 Here are some common patterns:
 ```yaml
 # Required environment variable (will error if not set)
 api_key: ${env.OPENAI_API_KEY}
 # Optional with default
 base_url: ${env.API_BASE_URL:=https://api.openai.com/v1}
 # Conditional field
 debug_mode: ${env.DEBUG:+true}
 # Optional field that becomes None if not set
 optional_token: ${env.OPTIONAL_TOKEN:+}
 ```
 #### Runtime Override
 You can override environment variables at runtime when starting the server:
 ```bash
 # Override specific environment variables
 llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
 # Or set them in your shell
 export API_KEY=sk-123
 export BASE_URL=https://custom-api.com
 llama stack run --config run.yaml
 ```
 #### Type Safety
 The environment variable substitution system is type-safe:
 - String values remain strings
 - Empty defaults (`${env.VAR:+}`) are converted to `None` for fields that accept `str | None`
 - Numeric defaults are properly typed (e.g., `${env.PORT:=8321}` becomes an integer)
 - Boolean defaults work correctly (e.g., `${env.DEBUG:=false}` becomes a boolean)
 ## Resources
 Finally, let's look at the `models` section:
@ -109,6 +198,18 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i
 What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
 If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below:
 ```yaml
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL:__disabled__}
  provider_id: ollama
  provider_model_id: ${env.INFERENCE_MODEL:__disabled__}
 ```
 The snippet above will only register this model if the environment variable `INFERENCE_MODEL` is set and non-empty. If the environment variable is not set, the model will not get registered at all.
 ## Server Configuration
 The `server` section configures the HTTP server that serves the Llama Stack APIs:
@ -140,7 +241,7 @@ server:
    config:
      jwks:
        uri: "https://kubernetes.default.svc:8443/openid/v1/jwks"
-        token: "${env.TOKEN:}"
+        token: "${env.TOKEN:+}"
        key_recheck_period: 3600
      tls_cafile: "/path/to/ca.crt"
      issuer: "https://kubernetes.default.svc"
@ -384,12 +485,12 @@ providers:
  - provider_id: vllm-0
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_URL:http://localhost:8000}
+      url: ${env.VLLM_URL:=http://localhost:8000}
  # this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
  - provider_id: vllm-1
    provider_type: remote::vllm
    config:
-      url: ${env.SAFETY_VLLM_URL:http://localhost:8001}
+      url: ${env.SAFETY_VLLM_URL:=http://localhost:8001}
 ...
 models:
 - metadata: {}
--- a/docs/source/distributions/k8s/stack-configmap.yaml
+++ b/docs/source/distributions/k8s/stack-configmap.yaml
@ -15,10 +15,10 @@ data:
      - provider_id: vllm-inference
        provider_type: remote::vllm
        config:
-          url: ${env.VLLM_URL:http://localhost:8000/v1}
+          url: ${env.VLLM_URL:=http://localhost:8000/v1}
-          max_tokens: ${env.VLLM_MAX_TOKENS:4096}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-          api_token: ${env.VLLM_API_TOKEN:fake}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
-          tls_verify: ${env.VLLM_TLS_VERIFY:true}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
      - provider_id: vllm-safety
        provider_type: remote::vllm
        config:
@ -30,10 +30,10 @@ data:
        provider_type: inline::sentence-transformers
        config: {}
      vector_io:
-      - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+      - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
        provider_type: remote::chromadb
        config:
-          url: ${env.CHROMADB_URL:}
+          url: ${env.CHROMADB_URL:+}
      safety:
      - provider_id: llama-guard
        provider_type: inline::llama-guard
@ -45,34 +45,34 @@ data:
        config:
          persistence_store:
            type: postgres
-            host: ${env.POSTGRES_HOST:localhost}
+            host: ${env.POSTGRES_HOST:=localhost}
-            port: ${env.POSTGRES_PORT:5432}
+            port: ${env.POSTGRES_PORT:=5432}
-            db: ${env.POSTGRES_DB:llamastack}
+            db: ${env.POSTGRES_DB:=llamastack}
            user: ${env.POSTGRES_USER:llamastack}
-            password: ${env.POSTGRES_PASSWORD:llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
          responses_store:
            type: postgres
-            host: ${env.POSTGRES_HOST:localhost}
+            host: ${env.POSTGRES_HOST:=localhost}
-            port: ${env.POSTGRES_PORT:5432}
+            port: ${env.POSTGRES_PORT:=5432}
-            db: ${env.POSTGRES_DB:llamastack}
+            db: ${env.POSTGRES_DB:=llamastack}
-            user: ${env.POSTGRES_USER:llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
-            password: ${env.POSTGRES_PASSWORD:llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
      telemetry:
      - provider_id: meta-reference
        provider_type: inline::meta-reference
        config:
-          service_name: ${env.OTEL_SERVICE_NAME:}
+          service_name: ${env.OTEL_SERVICE_NAME:+}
          sinks: ${env.TELEMETRY_SINKS:console}
      tool_runtime:
      - provider_id: brave-search
        provider_type: remote::brave-search
        config:
-          api_key: ${env.BRAVE_SEARCH_API_KEY:}
+          api_key: ${env.BRAVE_SEARCH_API_KEY:+}
          max_results: 3
      - provider_id: tavily-search
        provider_type: remote::tavily-search
        config:
-          api_key: ${env.TAVILY_SEARCH_API_KEY:}
+          api_key: ${env.TAVILY_SEARCH_API_KEY:+}
          max_results: 3
      - provider_id: rag-runtime
        provider_type: inline::rag-runtime
@ -82,19 +82,19 @@ data:
        config: {}
    metadata_store:
      type: postgres
-      host: ${env.POSTGRES_HOST:localhost}
+      host: ${env.POSTGRES_HOST:=localhost}
-      port: ${env.POSTGRES_PORT:5432}
+      port: ${env.POSTGRES_PORT:=5432}
-      db: ${env.POSTGRES_DB:llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-      user: ${env.POSTGRES_USER:llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
-      password: ${env.POSTGRES_PASSWORD:llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
      table_name: llamastack_kvstore
    inference_store:
      type: postgres
-      host: ${env.POSTGRES_HOST:localhost}
+      host: ${env.POSTGRES_HOST:=localhost}
-      port: ${env.POSTGRES_PORT:5432}
+      port: ${env.POSTGRES_PORT:=5432}
-      db: ${env.POSTGRES_DB:llamastack}
+      db: ${env.POSTGRES_DB:=llamastack}
-      user: ${env.POSTGRES_USER:llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
-      password: ${env.POSTGRES_PASSWORD:llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
    models:
    - metadata:
        embedding_dimension: 384
@ -106,11 +106,11 @@ data:
      provider_id: vllm-inference
      model_type: llm
    - metadata: {}
-      model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+      model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
      provider_id: vllm-safety
      model_type: llm
    shields:
-    - shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+    - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
    vector_dbs: []
    datasets: []
    scoring_fns: []
--- a/docs/source/distributions/k8s/stack_run_config.yaml
+++ b/docs/source/distributions/k8s/stack_run_config.yaml
@ -12,25 +12,25 @@ providers:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
  - provider_id: vllm-safety
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_SAFETY_URL:http://localhost:8000/v1}
+      url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-      api_token: ${env.VLLM_API_TOKEN:fake}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:true}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
  vector_io:
-  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
    provider_type: remote::chromadb
    config:
-      url: ${env.CHROMADB_URL:}
+      url: ${env.CHROMADB_URL:+}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -42,34 +42,34 @@ providers:
    config:
      persistence_store:
        type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
+        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:5432}
+        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:llamastack}
+        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
      responses_store:
        type: postgres
-        host: ${env.POSTGRES_HOST:localhost}
+        host: ${env.POSTGRES_HOST:=localhost}
-        port: ${env.POSTGRES_PORT:5432}
+        port: ${env.POSTGRES_PORT:=5432}
-        db: ${env.POSTGRES_DB:llamastack}
+        db: ${env.POSTGRES_DB:=llamastack}
-        user: ${env.POSTGRES_USER:llamastack}
+        user: ${env.POSTGRES_USER:=llamastack}
-        password: ${env.POSTGRES_PASSWORD:llamastack}
+        password: ${env.POSTGRES_PASSWORD:=llamastack}
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:}
+      service_name: ${env.OTEL_SERVICE_NAME:+console}
-      sinks: ${env.TELEMETRY_SINKS:console}
+      sinks: ${env.TELEMETRY_SINKS:+console}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
      max_results: 3
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
@ -79,19 +79,19 @@ providers:
    config: {}
 metadata_store:
  type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
+  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:5432}
+  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:llamastack}
+  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
  table_name: llamastack_kvstore
 inference_store:
  type: postgres
-  host: ${env.POSTGRES_HOST:localhost}
+  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:5432}
+  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:llamastack}
+  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:llamastack}
+  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:llamastack}
+  password: ${env.POSTGRES_PASSWORD:=llamastack}
 models:
 - metadata:
    embedding_dimension: 384
@ -103,11 +103,11 @@ models:
  provider_id: vllm-inference
  model_type: llm
 - metadata: {}
-  model_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+  model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
  provider_id: vllm-safety
  model_type: llm
 shields:
- shield_id: ${env.SAFETY_MODEL:meta-llama/Llama-Guard-3-1B}
+- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
 vector_dbs: []
 datasets: []
 scoring_fns: []
--- a/llama_stack/apis/agents/init.py
+++ b/llama_stack/apis/agents/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .agents import *  # noqa: F401 F403
+from .agents import *
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -44,10 +44,55 @@ OpenAIResponseInputMessageContent = Annotated[
 register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
 class OpenAIResponseAnnotationFileCitation(BaseModel):
    type: Literal["file_citation"] = "file_citation"
    file_id: str
    filename: str
    index: int
@json_schema_type
 class OpenAIResponseAnnotationCitation(BaseModel):
    type: Literal["url_citation"] = "url_citation"
    end_index: int
    start_index: int
    title: str
    url: str
@json_schema_type
 class OpenAIResponseAnnotationContainerFileCitation(BaseModel):
    type: Literal["container_file_citation"] = "container_file_citation"
    container_id: str
    end_index: int
    file_id: str
    filename: str
    start_index: int
@json_schema_type
 class OpenAIResponseAnnotationFilePath(BaseModel):
    type: Literal["file_path"] = "file_path"
    file_id: str
    index: int
 OpenAIResponseAnnotations = Annotated[
    OpenAIResponseAnnotationFileCitation
    | OpenAIResponseAnnotationCitation
    | OpenAIResponseAnnotationContainerFileCitation
    | OpenAIResponseAnnotationFilePath,
    Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseAnnotations, name="OpenAIResponseAnnotations")
@json_schema_type
 class OpenAIResponseOutputMessageContentOutputText(BaseModel):
    text: str
    type: Literal["output_text"] = "output_text"
    annotations: list[OpenAIResponseAnnotations] = Field(default_factory=list)
 OpenAIResponseOutputMessageContent = Annotated[
@ -384,9 +429,16 @@ OpenAIResponseInput = Annotated[
 register_schema(OpenAIResponseInput, name="OpenAIResponseInput")
 # Must match type Literals of OpenAIResponseInputToolWebSearch below
 WebSearchToolTypes = ["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
@json_schema_type
 class OpenAIResponseInputToolWebSearch(BaseModel):
-    type: Literal["web_search"] | Literal["web_search_preview_2025_03_11"] = "web_search"
+    # Must match values of WebSearchToolTypes above
    type: Literal["web_search"] | Literal["web_search_preview"] | Literal["web_search_preview_2025_03_11"] = (
        "web_search"
    )
    # TODO: actually use search_context_size somewhere...
    search_context_size: str | None = Field(default="medium", pattern="^low|medium|high$")
    # TODO: add user_location
--- a/llama_stack/apis/batch_inference/init.py
+++ b/llama_stack/apis/batch_inference/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .batch_inference import *  # noqa: F401 F403
+from .batch_inference import *
--- a/llama_stack/apis/benchmarks/init.py
+++ b/llama_stack/apis/benchmarks/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .benchmarks import *  # noqa: F401 F403
+from .benchmarks import *
--- a/llama_stack/apis/datasetio/init.py
+++ b/llama_stack/apis/datasetio/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .datasetio import *  # noqa: F401 F403
+from .datasetio import *
--- a/llama_stack/apis/datasets/init.py
+++ b/llama_stack/apis/datasets/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .datasets import *  # noqa: F401 F403
+from .datasets import *
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
+from enum import Enum, StrEnum
 from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field
@ -13,7 +13,7 @@ from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
-class DatasetPurpose(str, Enum):
+class DatasetPurpose(StrEnum):
    """
    Purpose of the dataset. Each purpose has a required input data schema.
--- a/llama_stack/apis/eval/init.py
+++ b/llama_stack/apis/eval/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .eval import *  # noqa: F401 F403
+from .eval import *
--- a/llama_stack/apis/files/init.py
+++ b/llama_stack/apis/files/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .files import *  # noqa: F401 F403
+from .files import *
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
+from enum import StrEnum
 from typing import Annotated, Literal, Protocol, runtime_checkable
 from fastapi import File, Form, Response, UploadFile
@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 # OpenAI Files API Models
-class OpenAIFilePurpose(str, Enum):
+class OpenAIFilePurpose(StrEnum):
    """
    Valid purpose values for OpenAI Files API.
    """
--- a/llama_stack/apis/inference/init.py
+++ b/llama_stack/apis/inference/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .inference import *  # noqa: F401 F403
+from .inference import *
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -20,7 +20,7 @@ from typing_extensions import TypedDict
 from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent, InterleavedContentItem
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
-from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
+from llama_stack.apis.telemetry import MetricResponseMixin
 from llama_stack.models.llama.datatypes import (
    BuiltinTool,
    StopReason,
--- a/llama_stack/apis/inspect/init.py
+++ b/llama_stack/apis/inspect/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .inspect import *  # noqa: F401 F403
+from .inspect import *
--- a/llama_stack/apis/models/init.py
+++ b/llama_stack/apis/models/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .models import *  # noqa: F401 F403
+from .models import *
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
+from enum import StrEnum
 from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field
@ -22,7 +22,7 @@ class CommonModelFields(BaseModel):
@json_schema_type
-class ModelType(str, Enum):
+class ModelType(StrEnum):
    llm = "llm"
    embedding = "embedding"
--- a/llama_stack/apis/post_training/init.py
+++ b/llama_stack/apis/post_training/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .post_training import *  # noqa: F401 F403
+from .post_training import *
--- a/llama_stack/apis/providers/init.py
+++ b/llama_stack/apis/providers/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .providers import *  # noqa: F401 F403
+from .providers import *
--- a/llama_stack/apis/safety/init.py
+++ b/llama_stack/apis/safety/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .safety import *  # noqa: F401 F403
+from .safety import *
--- a/llama_stack/apis/scoring/init.py
+++ b/llama_stack/apis/scoring/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .scoring import *  # noqa: F401 F403
+from .scoring import *
--- a/llama_stack/apis/scoring_functions/init.py
+++ b/llama_stack/apis/scoring_functions/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .scoring_functions import *  # noqa: F401 F403
+from .scoring_functions import *
--- a/llama_stack/apis/shields/init.py
+++ b/llama_stack/apis/shields/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .shields import *  # noqa: F401 F403
+from .shields import *
--- a/llama_stack/apis/synthetic_data_generation/init.py
+++ b/llama_stack/apis/synthetic_data_generation/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .synthetic_data_generation import *  # noqa: F401 F403
+from .synthetic_data_generation import *
--- a/llama_stack/apis/telemetry/init.py
+++ b/llama_stack/apis/telemetry/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .telemetry import *  # noqa: F401 F403
+from .telemetry import *
--- a/llama_stack/apis/tools/init.py
+++ b/llama_stack/apis/tools/init.py
@ -4,5 +4,5 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .rag_tool import *  # noqa: F401 F403
+from .rag_tool import *
-from .tools import *  # noqa: F401 F403
+from .tools import *
--- a/llama_stack/apis/vector_dbs/init.py
+++ b/llama_stack/apis/vector_dbs/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .vector_dbs import *  # noqa: F401 F403
+from .vector_dbs import *
--- a/llama_stack/apis/vector_io/init.py
+++ b/llama_stack/apis/vector_io/init.py
@ -4,4 +4,4 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from .vector_io import *  # noqa: F401 F403
+from .vector_io import *
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -8,6 +8,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import uuid
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
@ -15,21 +16,80 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
 from llama_stack.strong_typing.schema import register_schema
@json_schema_type
 class ChunkMetadata(BaseModel):
    """
    `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that
        will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`
        is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.
        Use `Chunk.metadata` for metadata that will be used in the context during inference.
    :param chunk_id: The ID of the chunk. If not set, it will be generated based on the document ID and content.
    :param document_id: The ID of the document this chunk belongs to.
    :param source: The source of the content, such as a URL, file path, or other identifier.
    :param created_timestamp: An optional timestamp indicating when the chunk was created.
    :param updated_timestamp: An optional timestamp indicating when the chunk was last updated.
    :param chunk_window: The window of the chunk, which can be used to group related chunks together.
    :param chunk_tokenizer: The tokenizer used to create the chunk. Default is Tiktoken.
    :param chunk_embedding_model: The embedding model used to create the chunk's embedding.
    :param chunk_embedding_dimension: The dimension of the embedding vector for the chunk.
    :param content_token_count: The number of tokens in the content of the chunk.
    :param metadata_token_count: The number of tokens in the metadata of the chunk.
    """
    chunk_id: str | None = None
    document_id: str | None = None
    source: str | None = None
    created_timestamp: int | None = None
    updated_timestamp: int | None = None
    chunk_window: str | None = None
    chunk_tokenizer: str | None = None
    chunk_embedding_model: str | None = None
    chunk_embedding_dimension: int | None = None
    content_token_count: int | None = None
    metadata_token_count: int | None = None
@json_schema_type
 class Chunk(BaseModel):
    """
    A chunk of content that can be inserted into a vector database.
    :param content: The content of the chunk, which can be interleaved text, images, or other types.
    :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
-    :param metadata: Metadata associated with the chunk, such as document ID, source, or other relevant information.
+    :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
    :param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality.
    :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
        The `chunk_metadata` is required backend functionality.
    """
    content: InterleavedContent
    metadata: dict[str, Any] = Field(default_factory=dict)
    embedding: list[float] | None = None
    # The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
    stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
    chunk_metadata: ChunkMetadata | None = None
    model_config = {"populate_by_name": True}
    def model_post_init(self, __context):
        # Extract chunk_id from metadata if present
        if self.metadata and "chunk_id" in self.metadata:
            self.stored_chunk_id = self.metadata.pop("chunk_id")
    @property
    def chunk_id(self) -> str:
        """Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
        if self.stored_chunk_id:
            return self.stored_chunk_id
        if "document_id" in self.metadata:
            return generate_chunk_id(self.metadata["document_id"], str(self.content))
        return generate_chunk_id(str(uuid.uuid4()), str(self.content))
@json_schema_type
--- a/llama_stack/distribution/access_control/datatypes.py
+++ b/llama_stack/distribution/access_control/datatypes.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
+from enum import StrEnum
 from typing import Self
 from pydantic import BaseModel, model_validator
@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator
 from .conditions import parse_conditions
-class Action(str, Enum):
+class Action(StrEnum):
    CREATE = "create"
    READ = "read"
    UPDATE = "update"
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
+from enum import StrEnum
 from pathlib import Path
 from typing import Annotated, Any
@ -29,8 +29,8 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
 from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
 from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
-LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
+LLAMA_STACK_BUILD_CONFIG_VERSION = 2
-LLAMA_STACK_RUN_CONFIG_VERSION = "2"
+LLAMA_STACK_RUN_CONFIG_VERSION = 2
 RoutingKey = str | list[str]
@ -159,7 +159,7 @@ class LoggingConfig(BaseModel):
    )
-class AuthProviderType(str, Enum):
+class AuthProviderType(StrEnum):
    """Supported authentication provider types."""
    OAUTH2_TOKEN = "oauth2_token"
@ -182,7 +182,7 @@ class AuthenticationRequiredError(Exception):
    pass
-class QuotaPeriod(str, Enum):
+class QuotaPeriod(StrEnum):
    DAY = "day"
@ -229,7 +229,7 @@ class ServerConfig(BaseModel):
 class StackRunConfig(BaseModel):
-    version: str = LLAMA_STACK_RUN_CONFIG_VERSION
+    version: int = LLAMA_STACK_RUN_CONFIG_VERSION
    image_name: str = Field(
        ...,
@ -300,7 +300,7 @@ a default SQLite store will be used.""",
 class BuildConfig(BaseModel):
-    version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
+    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
    distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
    image_type: str = Field(
--- a/llama_stack/distribution/routers/inference.py
+++ b/llama_stack/distribution/routers/inference.py
@ -30,7 +30,13 @@ from llama_stack.apis.inference import (
    ListOpenAIChatCompletionResponse,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAICompletionWithInputMessages,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    Order,
    ResponseFormat,
    SamplingParams,
@ -41,14 +47,6 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.log import get_logger
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@ -16,17 +16,15 @@ from llama_stack.apis.vector_io import (
    QueryChunksResponse,
    SearchRankingOptions,
    VectorIO,
    VectorStoreDeleteResponse,
    VectorStoreListResponse,
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
 from llama_stack.apis.vector_io.vector_io import (
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
    VectorStoreFileContentsResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFileStatus,
    VectorStoreListResponse,
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -98,6 +98,15 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
        method = getattr(impls[api], register_method)
        for obj in objects:
            # In complex templates, like our starter template, we may have dynamic model ids
            # given by environment variables. This allows those environment variables to have
            # a default value of __disabled__ to skip registration of the model if not set.
            if (
                hasattr(obj, "provider_model_id")
                and obj.provider_model_id is not None
                and "__disabled__" in obj.provider_model_id
            ):
                continue
            # we want to maintain the type information in arguments to method.
            # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
            # we use model_dump() to find all the attrs and then getattr to get the still typed value.
@ -118,7 +127,12 @@ class EnvVarError(Exception):
    def __init__(self, var_name: str, path: str = ""):
        self.var_name = var_name
        self.path = path
-        super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
+        super().__init__(
            f"Environment variable '{var_name}' not set or empty {f'at {path}' if path else ''}. "
            f"Use ${{env.{var_name}:=default_value}} to provide a default value, "
            f"${{env.{var_name}:+value_if_set}} to make the field conditional, "
            f"or ensure the environment variable is set."
        )
 def replace_env_vars(config: Any, path: str = "") -> Any:
@ -141,25 +155,27 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
        return result
    elif isinstance(config, str):
-        # Updated pattern to support both default values (:) and conditional values (+)
+        # Pattern supports bash-like syntax: := for default and :+ for conditional and a optional value
-        pattern = r"\${env\.([A-Z0-9_]+)(?:([:\+])([^}]*))?}"
+        pattern = r"\${env\.([A-Z0-9_]+)(?::([=+])([^}]*))?}"
-        def get_env_var(match):
+        def get_env_var(match: re.Match):
            env_var = match.group(1)
-            operator = match.group(2)  # ':' for default, '+' for conditional
+            operator = match.group(2)  # '=' for default, '+' for conditional
            value_expr = match.group(3)
            env_value = os.environ.get(env_var)
-            if operator == ":":  # Default value syntax: ${env.FOO:default}
+            if operator == "=":  # Default value syntax: ${env.FOO:=default}
                if not env_value:
-                    if value_expr is None:
+                    # value_expr returns empty string (not None) when not matched
                    # This means ${env.FOO:=} is an error
                    if value_expr == "":
                        raise EnvVarError(env_var, path)
                    else:
                        value = value_expr
                else:
                    value = env_value
-            elif operator == "+":  # Conditional value syntax: ${env.FOO+value_if_set}
+            elif operator == "+":  # Conditional value syntax: ${env.FOO:+value_if_set}
                if env_value:
                    value = value_expr
                else:
@ -174,13 +190,42 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
            return os.path.expanduser(value)
        try:
-            return re.sub(pattern, get_env_var, config)
+            result = re.sub(pattern, get_env_var, config)
            return _convert_string_to_proper_type(result)
        except EnvVarError as e:
            raise EnvVarError(e.var_name, e.path) from None
    return config
 def _convert_string_to_proper_type(value: str) -> Any:
    # This might be tricky depending on what the config type is, if  'str | None' we are
    # good, if 'str' we need to keep the empty string... 'str | None' is more common and
    # providers config should be typed this way.
    # TODO: we could try to load the config class and see if the config has a field with type 'str | None'
    # and then convert the empty string to None or not
    if value == "":
        return None
    lowered = value.lower()
    if lowered == "true":
        return True
    elif lowered == "false":
        return False
    try:
        return int(value)
    except ValueError:
        pass
    try:
        return float(value)
    except ValueError:
        pass
    return value
 def validate_env_pair(env_pair: str) -> tuple[str, str]:
    """Validate and split an environment variable key-value pair."""
    try:
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@ -25,7 +25,7 @@ class LlamaStackApi:
    def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: dict | None):
        """Run scoring on a single row"""
        if not scoring_params:
-            scoring_params = {fn_id: None for fn_id in scoring_function_ids}
+            scoring_params = dict.fromkeys(scoring_function_ids)
        return self.client.scoring.score(input_rows=[row], scoring_functions=scoring_params)
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@ -33,7 +33,7 @@ CATEGORIES = [
 ]
 # Initialize category levels with default level
-_category_levels: dict[str, int] = {category: DEFAULT_LOG_LEVEL for category in CATEGORIES}
+_category_levels: dict[str, int] = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
 def config_to_category_levels(category: str, level: str):
--- a/llama_stack/models/llama/datatypes.py
+++ b/llama_stack/models/llama/datatypes.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import base64
-from enum import Enum
+from enum import Enum, StrEnum
 from io import BytesIO
 from typing import Annotated, Any, Literal
@ -171,7 +171,7 @@ class GenerationResult(BaseModel):
    ignore_token: bool
-class QuantizationMode(str, Enum):
+class QuantizationMode(StrEnum):
    none = "none"
    fp8_mixed = "fp8_mixed"
    int4_mixed = "int4_mixed"
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
+from enum import StrEnum
 from typing import Any, Protocol
 from urllib.parse import urlparse
@ -225,7 +225,7 @@ def remote_provider_spec(
    )
-class HealthStatus(str, Enum):
+class HealthStatus(StrEnum):
    OK = "OK"
    ERROR = "Error"
    NOT_IMPLEMENTED = "Not Implemented"
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@ -42,9 +42,10 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageWebSearchToolCall,
    OpenAIResponseText,
    OpenAIResponseTextFormat,
    WebSearchToolTypes,
 )
 from llama_stack.apis.common.content_types import TextContentItem
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
    Inference,
    OpenAIAssistantMessageParam,
    OpenAIChatCompletion,
@ -583,7 +584,7 @@ class OpenAIResponsesImpl:
        from llama_stack.apis.agents.openai_responses import (
            MCPListToolsTool,
        )
-        from llama_stack.apis.tools.tools import Tool
+        from llama_stack.apis.tools import Tool
        mcp_tool_to_server = {}
@ -609,7 +610,7 @@ class OpenAIResponsesImpl:
            # TODO: Handle other tool types
            if input_tool.type == "function":
                chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
-            elif input_tool.type == "web_search":
+            elif input_tool.type in WebSearchToolTypes:
                tool_name = "web_search"
                tool = await self.tool_groups_api.get_tool(tool_name)
                if not tool:
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@ -208,7 +208,7 @@ class MetaReferenceEvalImpl(
                for scoring_fn_id in scoring_functions
            }
        else:
-            scoring_functions_dict = {scoring_fn_id: None for scoring_fn_id in scoring_functions}
+            scoring_functions_dict = dict.fromkeys(scoring_functions)
        score_response = await self.scoring_api.score(
            input_rows=score_input_rows, scoring_functions=scoring_functions_dict
--- a/llama_stack/providers/inline/files/localfs/config.py
+++ b/llama_stack/providers/inline/files/localfs/config.py
@ -23,7 +23,7 @@ class LocalfsFilesImplConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
-            "storage_dir": "${env.FILES_STORAGE_DIR:" + __distro_dir__ + "/files}",
+            "storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
            "metadata_store": SqliteSqlStoreConfig.sample_run_config(
                __distro_dir__=__distro_dir__,
                db_name="files_metadata.db",
--- a/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/llama_stack/providers/inline/inference/meta_reference/config.py
@ -49,11 +49,11 @@ class MetaReferenceInferenceConfig(BaseModel):
    def sample_run_config(
        cls,
        model: str = "Llama3.2-3B-Instruct",
-        checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
+        checkpoint_dir: str = "${env.CHECKPOINT_DIR:=null}",
-        quantization_type: str = "${env.QUANTIZATION_TYPE:bf16}",
+        quantization_type: str = "${env.QUANTIZATION_TYPE:=bf16}",
-        model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:0}",
+        model_parallel_size: str = "${env.MODEL_PARALLEL_SIZE:=0}",
-        max_batch_size: str = "${env.MAX_BATCH_SIZE:1}",
+        max_batch_size: str = "${env.MAX_BATCH_SIZE:=1}",
-        max_seq_len: str = "${env.MAX_SEQ_LEN:4096}",
+        max_seq_len: str = "${env.MAX_SEQ_LEN:=4096}",
        **kwargs,
    ) -> dict[str, Any]:
        return {
--- a/llama_stack/providers/inline/inference/vllm/config.py
+++ b/llama_stack/providers/inline/inference/vllm/config.py
@ -44,10 +44,10 @@ class VLLMConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
        return {
-            "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:1}",
+            "tensor_parallel_size": "${env.TENSOR_PARALLEL_SIZE:=1}",
-            "max_tokens": "${env.MAX_TOKENS:4096}",
+            "max_tokens": "${env.MAX_TOKENS:=4096}",
-            "max_model_len": "${env.MAX_MODEL_LEN:4096}",
+            "max_model_len": "${env.MAX_MODEL_LEN:=4096}",
-            "max_num_seqs": "${env.MAX_NUM_SEQS:4}",
+            "max_num_seqs": "${env.MAX_NUM_SEQS:=4}",
-            "enforce_eager": "${env.ENFORCE_EAGER:False}",
+            "enforce_eager": "${env.ENFORCE_EAGER:=False}",
-            "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:0.3}",
+            "gpu_memory_utilization": "${env.GPU_MEMORY_UTILIZATION:=0.3}",
        }
--- a/llama_stack/providers/inline/scoring/braintrust/config.py
+++ b/llama_stack/providers/inline/scoring/braintrust/config.py
@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
-            "openai_api_key": "${env.OPENAI_API_KEY:}",
+            "openai_api_key": "${env.OPENAI_API_KEY:+}",
        }
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
@ -7,7 +7,7 @@ from typing import Any
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.scoring import (
    ScoreBatchResponse,
    ScoreResponse,
--- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
@ -6,7 +6,7 @@
 import re
 from typing import Any
-from llama_stack.apis.inference.inference import Inference, UserMessage
+from llama_stack.apis.inference import Inference, UserMessage
 from llama_stack.apis.scoring import ScoringResultRow
 from llama_stack.apis.scoring_functions import ScoringFnParams
 from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from enum import Enum
+from enum import StrEnum
 from typing import Any
 from pydantic import BaseModel, Field, field_validator
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, field_validator
 from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
-class TelemetrySink(str, Enum):
+class TelemetrySink(StrEnum):
    OTEL_TRACE = "otel_trace"
    OTEL_METRIC = "otel_metric"
    SQLITE = "sqlite"
@ -20,12 +20,12 @@ class TelemetrySink(str, Enum):
 class TelemetryConfig(BaseModel):
-    otel_trace_endpoint: str = Field(
+    otel_trace_endpoint: str | None = Field(
-        default="http://localhost:4318/v1/traces",
+        default=None,
        description="The OpenTelemetry collector endpoint URL for traces",
    )
-    otel_metric_endpoint: str = Field(
+    otel_metric_endpoint: str | None = Field(
-        default="http://localhost:4318/v1/metrics",
+        default=None,
        description="The OpenTelemetry collector endpoint URL for metrics",
    )
    service_name: str = Field(
@ -52,7 +52,7 @@ class TelemetryConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> dict[str, Any]:
        return {
-            "service_name": "${env.OTEL_SERVICE_NAME:\u200b}",
+            "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
-            "sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
+            "sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
-            "sqlite_db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            "sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        }
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -87,12 +87,16 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
            trace.set_tracer_provider(provider)
            _TRACER_PROVIDER = provider
            if TelemetrySink.OTEL_TRACE in self.config.sinks:
                if self.config.otel_trace_endpoint is None:
                    raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled")
                span_exporter = OTLPSpanExporter(
                    endpoint=self.config.otel_trace_endpoint,
                )
                span_processor = BatchSpanProcessor(span_exporter)
                trace.get_tracer_provider().add_span_processor(span_processor)
            if TelemetrySink.OTEL_METRIC in self.config.sinks:
                if self.config.otel_metric_endpoint is None:
                    raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled")
                metric_reader = PeriodicExportingMetricReader(
                    OTLPMetricExporter(
                        endpoint=self.config.otel_metric_endpoint,
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@ -81,6 +81,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
        chunks = []
        for doc in documents:
            content = await content_from_doc(doc)
            # TODO: we should add enrichment here as URLs won't be added to the metadata by default
            chunks.extend(
                make_overlapped_chunks(
                    doc.document_id,
@ -157,8 +158,24 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
                )
                break
-            metadata_subset = {k: v for k, v in metadata.items() if k not in ["token_count", "metadata_token_count"]}
+            # Add useful keys from chunk_metadata to metadata and remove some from metadata
-            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_subset)
+            chunk_metadata_keys_to_include_from_context = [
                "chunk_id",
                "document_id",
                "source",
            ]
            metadata_keys_to_exclude_from_context = [
                "token_count",
                "metadata_token_count",
            ]
            metadata_for_context = {}
            for k in chunk_metadata_keys_to_include_from_context:
                metadata_for_context[k] = getattr(chunk.chunk_metadata, k)
            for k in metadata:
                if k not in metadata_keys_to_exclude_from_context:
                    metadata_for_context[k] = metadata[k]
            text_content = query_config.chunk_template.format(index=i + 1, chunk=chunk, metadata=metadata_for_context)
            picked.append(TextContentItem(text=text_content))
        picked.append(TextContentItem(text="END of knowledge_search tool results.\n"))
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -16,8 +16,7 @@ import numpy as np
 from numpy.typing import NDArray
 from llama_stack.apis.files import Files
-from llama_stack.apis.inference import InterleavedContent
+from llama_stack.apis.inference import Inference, InterleavedContent
 from llama_stack.apis.inference.inference import Inference
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    Chunk,
--- a/llama_stack/providers/inline/vector_io/qdrant/config.py
+++ b/llama_stack/providers/inline/vector_io/qdrant/config.py
@ -19,5 +19,5 @@ class QdrantVectorIOConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
-            "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
+            "path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
        }
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
@ -15,5 +15,5 @@ class SQLiteVectorIOConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
-            "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + "sqlite_vec.db",
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
        }
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -5,20 +5,18 @@
 # the root directory of this source tree.
 import asyncio
 import hashlib
 import json
 import logging
 import sqlite3
 import struct
 import uuid
 from typing import Any
 import numpy as np
 import sqlite_vec
 from numpy.typing import NDArray
-from llama_stack.apis.files.files import Files
+from llama_stack.apis.files import Files
-from llama_stack.apis.inference.inference import Inference
+from llama_stack.apis.inference import Inference
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    Chunk,
@ -66,7 +64,7 @@ def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
    score_range = max_score - min_score
    if score_range > 0:
        return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
-    return {doc_id: 1.0 for doc_id in scores}
+    return dict.fromkeys(scores, 1.0)
 def _weighted_rerank(
@ -201,10 +199,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                    batch_embeddings = embeddings[i : i + batch_size]
                    # Insert metadata
-                    metadata_data = [
+                    metadata_data = [(chunk.chunk_id, chunk.model_dump_json()) for chunk in batch_chunks]
                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json())
                        for chunk in batch_chunks
                    ]
                    cur.executemany(
                        f"""
                        INSERT INTO {self.metadata_table} (id, chunk)
@ -218,7 +213,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                    embedding_data = [
                        (
                            (
-                                generate_chunk_id(chunk.metadata["document_id"], chunk.content),
+                                chunk.chunk_id,
                                serialize_vector(emb.tolist()),
                            )
                        )
@ -230,10 +225,7 @@ class SQLiteVecIndex(EmbeddingIndex):
                    )
                    # Insert FTS content
-                    fts_data = [
+                    fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.content)
                        for chunk in batch_chunks
                    ]
                    # DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
                    cur.executemany(
                        f"DELETE FROM {self.fts_table} WHERE id = ?;",
@ -381,13 +373,12 @@ class SQLiteVecIndex(EmbeddingIndex):
        vector_response = await self.query_vector(embedding, k, score_threshold)
        keyword_response = await self.query_keyword(query_string, k, score_threshold)
-        # Convert responses to score dictionaries using generate_chunk_id
+        # Convert responses to score dictionaries using chunk_id
        vector_scores = {
-            generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score
+            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
            for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
        }
        keyword_scores = {
-            generate_chunk_id(chunk.metadata["document_id"], str(chunk.content)): score
+            chunk.chunk_id: score
            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
        }
@ -408,13 +399,7 @@ class SQLiteVecIndex(EmbeddingIndex):
        filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
        # Create a map of chunk_id to chunk for both responses
-        chunk_map = {}
+        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
        for c in vector_response.chunks:
            chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content))
            chunk_map[chunk_id] = c
        for c in keyword_response.chunks:
            chunk_id = generate_chunk_id(c.metadata["document_id"], str(c.content))
            chunk_map[chunk_id] = c
        # Use the map to look up chunks by their IDs
        chunks = []
@ -757,9 +742,3 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
        if vector_db_id not in self.cache:
            raise ValueError(f"Vector DB {vector_db_id} not found")
        return await self.cache[vector_db_id].query_chunks(query, params)
 def generate_chunk_id(document_id: str, chunk_text: str) -> str:
    """Generate a unique chunk ID using a hash of document ID and chunk text."""
    hash_input = f"{document_id}:{chunk_text}".encode()
    return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -70,7 +70,7 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.inference,
            adapter=AdapterSpec(
                adapter_type="ollama",
-                pip_packages=["ollama", "aiohttp"],
+                pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
                config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
                module="llama_stack.providers.remote.inference.ollama",
            ),
--- a/llama_stack/providers/registry/safety.py
+++ b/llama_stack/providers/registry/safety.py
@ -67,7 +67,7 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.safety,
            adapter=AdapterSpec(
                adapter_type="sambanova",
-                pip_packages=["litellm"],
+                pip_packages=["litellm", "requests"],
                module="llama_stack.providers.remote.safety.sambanova",
                config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig",
                provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator",
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
        InlineProviderSpec(
            api=Api.scoring,
            provider_type="inline::basic",
-            pip_packages=[],
+            pip_packages=["requests"],
            module="llama_stack.providers.inline.scoring.basic",
            config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
            api_dependencies=[
--- a/llama_stack/providers/remote/datasetio/nvidia/config.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/config.py
@ -54,8 +54,8 @@ class NvidiaDatasetIOConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
-            "api_key": "${env.NVIDIA_API_KEY:}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
-            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
+            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
-            "project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
+            "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
-            "datasets_url": "${env.NVIDIA_DATASETS_URL:http://nemo.test}",
+            "datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
        }
--- a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
+++ b/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
@ -66,7 +66,7 @@ class NvidiaDatasetIOAdapter:
        Returns:
            Dataset
        """
-        ## add warnings for unsupported params
+        # add warnings for unsupported params
        request_body = {
            "name": dataset_def.identifier,
            "namespace": self.config.dataset_namespace,
--- a/llama_stack/providers/remote/eval/nvidia/config.py
+++ b/llama_stack/providers/remote/eval/nvidia/config.py
@ -25,5 +25,5 @@ class NVIDIAEvalConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
-            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
+            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}",
        }
--- a/llama_stack/providers/remote/inference/anthropic/models.py
+++ b/llama_stack/providers/remote/inference/anthropic/models.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
 )
--- a/llama_stack/providers/remote/inference/fireworks/fireworks.py
+++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py
@ -24,6 +24,12 @@ from llama_stack.apis.inference import (
    Inference,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    ResponseFormatType,
    SamplingParams,
@ -33,14 +39,6 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import (
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
--- a/llama_stack/providers/remote/inference/gemini/models.py
+++ b/llama_stack/providers/remote/inference/gemini/models.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
 )
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@ -9,7 +9,7 @@ from typing import Any
 from openai import AsyncOpenAI
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAIChoiceDelta,
--- a/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/llama_stack/providers/remote/inference/nvidia/config.py
@ -55,7 +55,7 @@ class NVIDIAConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
-            "url": "${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}",
+            "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
-            "api_key": "${env.NVIDIA_API_KEY:}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
-            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:True}",
+            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
        }
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -29,20 +29,18 @@ from llama_stack.apis.inference import (
    Inference,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
    ToolChoice,
    ToolConfig,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import ToolDefinition, ToolPromptFormat
 from llama_stack.providers.utils.inference import (
--- a/llama_stack/providers/remote/inference/ollama/init.py
+++ b/llama_stack/providers/remote/inference/ollama/init.py
@ -10,6 +10,6 @@ from .config import OllamaImplConfig
 async def get_adapter_impl(config: OllamaImplConfig, _deps):
    from .ollama import OllamaInferenceAdapter
-    impl = OllamaInferenceAdapter(config.url)
+    impl = OllamaInferenceAdapter(config)
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@ -13,7 +13,13 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
 class OllamaImplConfig(BaseModel):
    url: str = DEFAULT_OLLAMA_URL
    raise_on_connect_error: bool = True
    @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]:
+    def sample_run_config(
-        return {"url": url}
+        cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
    ) -> dict[str, Any]:
        return {
            "url": url,
            "raise_on_connect_error": raise_on_connect_error,
        }
--- a/llama_stack/providers/remote/inference/ollama/models.py
+++ b/llama_stack/providers/remote/inference/ollama/models.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -9,7 +9,6 @@ import uuid
 from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
 import httpx
 from ollama import AsyncClient  # type: ignore[attr-defined]
 from openai import AsyncOpenAI
@ -33,6 +32,13 @@ from llama_stack.apis.inference import (
    JsonSchemaResponseFormat,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
@ -41,15 +47,6 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
@ -57,6 +54,7 @@ from llama_stack.providers.datatypes import (
    HealthStatus,
    ModelsProtocolPrivate,
 )
 from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
 )
@ -90,9 +88,10 @@ class OllamaInferenceAdapter(
    InferenceProvider,
    ModelsProtocolPrivate,
 ):
-    def __init__(self, url: str) -> None:
+    def __init__(self, config: OllamaImplConfig) -> None:
        self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
-        self.url = url
+        self.url = config.url
        self.raise_on_connect_error = config.raise_on_connect_error
    @property
    def client(self) -> AsyncClient:
@ -103,8 +102,13 @@ class OllamaInferenceAdapter(
        return AsyncOpenAI(base_url=f"{self.url}/v1", api_key="ollama")
    async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.url}`...")
+        logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
-        await self.health()
+        health_response = await self.health()
        if health_response["status"] == HealthStatus.ERROR:
            if self.raise_on_connect_error:
                raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
            else:
                logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
    async def health(self) -> HealthResponse:
        """
@ -117,10 +121,8 @@ class OllamaInferenceAdapter(
        try:
            await self.client.ps()
            return HealthResponse(status=HealthStatus.OK)
-        except httpx.ConnectError as e:
+        except Exception as e:
-            raise RuntimeError(
+            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
                "Ollama Server is not running, start it using `ollama serve` in a separate terminal"
            ) from e
    async def shutdown(self) -> None:
        pass
--- a/llama_stack/providers/remote/inference/openai/models.py
+++ b/llama_stack/providers/remote/inference/openai/models.py
@ -6,7 +6,7 @@
 from dataclasses import dataclass
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
 )
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@ -10,7 +10,7 @@ from typing import Any
 from openai import AsyncOpenAI
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@ -19,7 +19,12 @@ from llama_stack.apis.inference import (
    Inference,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
@ -28,13 +33,6 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.apis.models import Model
 from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
--- a/llama_stack/providers/remote/inference/runpod/config.py
+++ b/llama_stack/providers/remote/inference/runpod/config.py
@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
        return {
-            "url": "${env.RUNPOD_URL:}",
+            "url": "${env.RUNPOD_URL:+}",
-            "api_token": "${env.RUNPOD_API_TOKEN:}",
+            "api_token": "${env.RUNPOD_API_TOKEN:+}",
        }
--- a/llama_stack/providers/remote/inference/runpod/runpod.py
+++ b/llama_stack/providers/remote/inference/runpod/runpod.py
@ -8,7 +8,7 @@ from collections.abc import AsyncGenerator
 from openai import OpenAI
 from llama_stack.apis.inference import *  # noqa: F403
-from llama_stack.apis.inference.inference import OpenAIEmbeddingsResponse
+from llama_stack.apis.inference import OpenAIEmbeddingsResponse
 # from llama_stack.providers.datatypes import ModelsProtocolPrivate
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
--- a/llama_stack/providers/remote/inference/together/config.py
+++ b/llama_stack/providers/remote/inference/together/config.py
@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
            "url": "https://api.together.xyz/v1",
-            "api_key": "${env.TOGETHER_API_KEY:}",
+            "api_key": "${env.TOGETHER_API_KEY:+}",
        }
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.utils.inference.model_registry import (
    ProviderModelEntry,
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@ -23,7 +23,12 @@ from llama_stack.apis.inference import (
    Inference,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    ResponseFormatType,
    SamplingParams,
@ -33,13 +38,6 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@ -34,9 +34,6 @@ class VLLMInferenceAdapterConfig(BaseModel):
    @classmethod
    def validate_tls_verify(cls, v):
        if isinstance(v, str):
            # Check if it's a boolean string
            if v.lower() in ("true", "false"):
                return v.lower() == "true"
            # Otherwise, treat it as a cert path
            cert_path = Path(v).expanduser().resolve()
            if not cert_path.exists():
@ -54,7 +51,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
    ):
        return {
            "url": url,
-            "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
+            "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
-            "api_token": "${env.VLLM_API_TOKEN:fake}",
+            "api_token": "${env.VLLM_API_TOKEN:=fake}",
-            "tls_verify": "${env.VLLM_TLS_VERIFY:true}",
+            "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
        }
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any
 import httpx
-from openai import AsyncOpenAI
+from openai import APIConnectionError, AsyncOpenAI
 from openai.types.chat.chat_completion_chunk import (
    ChatCompletionChunk as OpenAIChatCompletionChunk,
 )
@ -38,9 +38,13 @@ from llama_stack.apis.inference import (
    JsonSchemaResponseFormat,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAICompletion,
    OpenAIEmbeddingData,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
@ -49,12 +53,6 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.apis.inference.inference import (
    OpenAIChatCompletion,
    OpenAICompletion,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
 from llama_stack.models.llama.sku_list import all_registered_models
@ -461,7 +459,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
            model = await self.register_helper.register_model(model)
        except ValueError:
            pass  # Ignore statically unknown model, will check live listing
-        res = await client.models.list()
+        try:
            res = await client.models.list()
        except APIConnectionError as e:
            raise ValueError(
                f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
            ) from e
        available_models = [m.id async for m in res]
        if model.provider_resource_id not in available_models:
            raise ValueError(
--- a/llama_stack/providers/remote/inference/watsonx/config.py
+++ b/llama_stack/providers/remote/inference/watsonx/config.py
@ -40,7 +40,7 @@ class WatsonXConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
-            "url": "${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}",
+            "url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
-            "api_key": "${env.WATSONX_API_KEY:}",
+            "api_key": "${env.WATSONX_API_KEY:+}",
-            "project_id": "${env.WATSONX_PROJECT_ID:}",
+            "project_id": "${env.WATSONX_PROJECT_ID:+}",
        }
--- a/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -18,10 +18,16 @@ from llama_stack.apis.inference import (
    CompletionRequest,
    EmbeddingsResponse,
    EmbeddingTaskType,
    GreedySamplingStrategy,
    Inference,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
@ -29,14 +35,6 @@ from llama_stack.apis.inference import (
    ToolConfig,
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.apis.inference.inference import (
    GreedySamplingStrategy,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
 )
--- a/llama_stack/providers/remote/post_training/nvidia/config.py
+++ b/llama_stack/providers/remote/post_training/nvidia/config.py
@ -55,10 +55,10 @@ class NvidiaPostTrainingConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
-            "api_key": "${env.NVIDIA_API_KEY:}",
+            "api_key": "${env.NVIDIA_API_KEY:+}",
-            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:default}",
+            "dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
-            "project_id": "${env.NVIDIA_PROJECT_ID:test-project}",
+            "project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
-            "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}",
+            "customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}",
        }
--- a/llama_stack/providers/remote/safety/nvidia/config.py
+++ b/llama_stack/providers/remote/safety/nvidia/config.py
@ -35,6 +35,6 @@ class NVIDIASafetyConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
        return {
-            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
+            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}",
-            "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:self-check}",
+            "config_id": "${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}",
        }
--- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py
@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
-            "api_key": "${env.BRAVE_SEARCH_API_KEY:}",
+            "api_key": "${env.BRAVE_SEARCH_API_KEY:+}",
            "max_results": 3,
        }
--- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
+++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
        return {
-            "api_key": "${env.TAVILY_SEARCH_API_KEY:}",
+            "api_key": "${env.TAVILY_SEARCH_API_KEY:+}",
            "max_results": 3,
        }
--- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
+++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
        return {
-            "api_key": "${env.WOLFRAM_ALPHA_API_KEY:}",
+            "api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}",
        }
--- a/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/config.py
@ -22,8 +22,8 @@ class PGVectorVectorIOConfig(BaseModel):
    @classmethod
    def sample_run_config(
        cls,
-        host: str = "${env.PGVECTOR_HOST:localhost}",
+        host: str = "${env.PGVECTOR_HOST:=localhost}",
-        port: int = "${env.PGVECTOR_PORT:5432}",
+        port: int = "${env.PGVECTOR_PORT:=5432}",
        db: str = "${env.PGVECTOR_DB}",
        user: str = "${env.PGVECTOR_USER}",
        password: str = "${env.PGVECTOR_PASSWORD}",
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -70,8 +70,8 @@ class QdrantIndex(EmbeddingIndex):
            )
        points = []
-        for i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
+        for _i, (chunk, embedding) in enumerate(zip(chunks, embeddings, strict=False)):
-            chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
+            chunk_id = chunk.chunk_id
            points.append(
                PointStruct(
                    id=convert_id(chunk_id),
--- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py
+++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py
@ -23,6 +23,13 @@ from llama_stack.apis.inference import (
    JsonSchemaResponseFormat,
    LogProbConfig,
    Message,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
@ -31,16 +38,7 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.models import Model
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
    OpenAICompletion,
    OpenAIEmbeddingsResponse,
    OpenAIEmbeddingUsage,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
 )
 from llama_stack.apis.models.models import Model
 from llama_stack.distribution.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@ -8,7 +8,7 @@ from typing import Any
 from pydantic import BaseModel, Field
-from llama_stack.apis.models.models import ModelType
+from llama_stack.apis.models import ModelType
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
 from llama_stack.providers.utils.inference import (
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -95,27 +95,25 @@ from llama_stack.apis.inference import (
    CompletionResponse,
    CompletionResponseStreamChunk,
    GreedySamplingStrategy,
    Message,
    SamplingParams,
    SystemMessage,
    TokenLogProbs,
    ToolChoice,
    ToolResponseMessage,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
    UserMessage,
 )
 from llama_stack.apis.inference.inference import (
    JsonSchemaResponseFormat,
    Message,
    OpenAIChatCompletion,
    OpenAICompletion,
    OpenAICompletionChoice,
    OpenAIEmbeddingData,
    OpenAIMessageParam,
    OpenAIResponseFormatParam,
    SamplingParams,
    SystemMessage,
    TokenLogProbs,
    ToolChoice,
    ToolConfig,
    ToolResponseMessage,
    TopKSamplingStrategy,
    TopPSamplingStrategy,
    UserMessage,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack.apis.inference import (
    OpenAIChoice as OpenAIChatCompletionChoice,
 )
 from llama_stack.models.llama.datatypes import (
@ -1026,7 +1024,9 @@ def openai_messages_to_messages(
    return converted_messages
-def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam]):
+def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
    if content is None:
        return ""
    if isinstance(content, str):
        return content
    elif isinstance(content, list):
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@ -45,8 +45,8 @@ class RedisKVStoreConfig(CommonConfig):
        return {
            "type": "redis",
            "namespace": None,
-            "host": "${env.REDIS_HOST:localhost}",
+            "host": "${env.REDIS_HOST:=localhost}",
-            "port": "${env.REDIS_PORT:6379}",
+            "port": "${env.REDIS_PORT:=6379}",
        }
@ -66,7 +66,7 @@ class SqliteKVStoreConfig(CommonConfig):
        return {
            "type": "sqlite",
            "namespace": None,
-            "db_path": "${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        }
@ -84,12 +84,12 @@ class PostgresKVStoreConfig(CommonConfig):
        return {
            "type": "postgres",
            "namespace": None,
-            "host": "${env.POSTGRES_HOST:localhost}",
+            "host": "${env.POSTGRES_HOST:=localhost}",
-            "port": "${env.POSTGRES_PORT:5432}",
+            "port": "${env.POSTGRES_PORT:=5432}",
-            "db": "${env.POSTGRES_DB:llamastack}",
+            "db": "${env.POSTGRES_DB:=llamastack}",
-            "user": "${env.POSTGRES_USER:llamastack}",
+            "user": "${env.POSTGRES_USER:=llamastack}",
-            "password": "${env.POSTGRES_PASSWORD:llamastack}",
+            "password": "${env.POSTGRES_PASSWORD:=llamastack}",
-            "table_name": "${env.POSTGRES_TABLE_NAME:" + table_name + "}",
+            "table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
        }
    @classmethod
@ -131,12 +131,12 @@ class MongoDBKVStoreConfig(CommonConfig):
        return {
            "type": "mongodb",
            "namespace": None,
-            "host": "${env.MONGODB_HOST:localhost}",
+            "host": "${env.MONGODB_HOST:=localhost}",
-            "port": "${env.MONGODB_PORT:5432}",
+            "port": "${env.MONGODB_PORT:=5432}",
            "db": "${env.MONGODB_DB}",
            "user": "${env.MONGODB_USER}",
            "password": "${env.MONGODB_PASSWORD}",
-            "collection_name": "${env.MONGODB_COLLECTION_NAME:" + collection_name + "}",
+            "collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
        }
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -12,8 +12,7 @@ import uuid
 from abc import ABC, abstractmethod
 from typing import Any
-from llama_stack.apis.files import Files
+from llama_stack.apis.files import Files, OpenAIFileObject
 from llama_stack.apis.files.files import OpenAIFileObject
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    Chunk,
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@ -7,6 +7,7 @@ import base64
 import io
 import logging
 import re
 import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any
@ -23,12 +24,13 @@ from llama_stack.apis.common.content_types import (
 )
 from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
+from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.inference.prompt_adapter import (
    interleaved_content_as_str,
 )
 from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
 log = logging.getLogger(__name__)
@ -148,6 +150,7 @@ async def content_from_doc(doc: RAGDocument) -> str:
 def make_overlapped_chunks(
    document_id: str, text: str, window_len: int, overlap_len: int, metadata: dict[str, Any]
 ) -> list[Chunk]:
    default_tokenizer = "DEFAULT_TIKTOKEN_TOKENIZER"
    tokenizer = Tokenizer.get_instance()
    tokens = tokenizer.encode(text, bos=False, eos=False)
    try:
@ -161,16 +164,32 @@ def make_overlapped_chunks(
    for i in range(0, len(tokens), window_len - overlap_len):
        toks = tokens[i : i + window_len]
        chunk = tokenizer.decode(toks)
        chunk_id = generate_chunk_id(chunk, text)
        chunk_metadata = metadata.copy()
        chunk_metadata["chunk_id"] = chunk_id
        chunk_metadata["document_id"] = document_id
        chunk_metadata["token_count"] = len(toks)
        chunk_metadata["metadata_token_count"] = len(metadata_tokens)
        backend_chunk_metadata = ChunkMetadata(
            chunk_id=chunk_id,
            document_id=document_id,
            source=metadata.get("source", None),
            created_timestamp=metadata.get("created_timestamp", int(time.time())),
            updated_timestamp=int(time.time()),
            chunk_window=f"{i}-{i + len(toks)}",
            chunk_tokenizer=default_tokenizer,
            chunk_embedding_model=None,  # This will be set in `VectorDBWithIndex.insert_chunks`
            content_token_count=len(toks),
            metadata_token_count=len(metadata_tokens),
        )
        # chunk is a string
        chunks.append(
            Chunk(
                content=chunk,
                metadata=chunk_metadata,
                chunk_metadata=backend_chunk_metadata,
            )
        )
@ -237,6 +256,9 @@ class VectorDBWithIndex:
        for i, c in enumerate(chunks):
            if c.embedding is None:
                chunks_to_embed.append(c)
                if c.chunk_metadata:
                    c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model
                    c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension
            else:
                _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
--- a/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/sqlstore.py
@ -50,7 +50,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
        return cls(
            type="sqlite",
-            db_path="${env.SQLITE_STORE_DIR:" + __distro_dir__ + "}/" + db_name,
+            db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
        )
    @property
@ -78,11 +78,11 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
    def sample_run_config(cls, **kwargs):
        return cls(
            type="postgres",
-            host="${env.POSTGRES_HOST:localhost}",
+            host="${env.POSTGRES_HOST:=localhost}",
-            port="${env.POSTGRES_PORT:5432}",
+            port="${env.POSTGRES_PORT:=5432}",
-            db="${env.POSTGRES_DB:llamastack}",
+            db="${env.POSTGRES_DB:=llamastack}",
-            user="${env.POSTGRES_USER:llamastack}",
+            user="${env.POSTGRES_USER:=llamastack}",
-            password="${env.POSTGRES_PASSWORD:llamastack}",
+            password="${env.POSTGRES_PASSWORD:=llamastack}",
        )
--- a/Show more
+++ b/Show more