Merge branch 'main' into fix-urllib3

2025-12-20 12:58:41 +00:00 · 2025-06-26 23:05:02 -04:00 · 2025-06-26 23:05:02 -04:00 · 9410d8f7a4
commit 9410d8f7a4
parent 724cdca4d9 0ddb293d77
23 changed files with 335 additions and 85 deletions
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@ -0,0 +1,142 @@
+name: Vector IO Integration Tests
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'llama_stack/**'
+      - 'tests/integration/vector_io/**'
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - '.github/workflows/integration-vector-io-tests.yml' # This workflow
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-matrix:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        vector-io-provider: ["inline::faiss", "inline::sqlite-vec", "remote::chromadb", "remote::pgvector"]
+        python-version: ["3.12", "3.13"]
+      fail-fast: false # we want to run all tests regardless of failure
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Install dependencies
+        uses: ./.github/actions/setup-runner
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Setup Chroma
+        if: matrix.vector-io-provider == 'remote::chromadb'
+        run: |
+          docker run --rm -d --pull always \
+            --name chromadb \
+            -p 8000:8000 \
+            -v ~/chroma:/chroma/chroma \
+            -e IS_PERSISTENT=TRUE \
+            -e ANONYMIZED_TELEMETRY=FALSE \
+            chromadb/chroma:latest
+
+      - name: Start PGVector DB
+        if: matrix.vector-io-provider == 'remote::pgvector'
+        run: |
+          docker run -d \
+            --name pgvector \
+            -e POSTGRES_USER=llamastack \
+            -e POSTGRES_PASSWORD=llamastack \
+            -e POSTGRES_DB=llamastack \
+            -p 5432:5432 \
+            pgvector/pgvector:pg17
+
+      - name: Wait for PGVector to be ready
+        if: matrix.vector-io-provider == 'remote::pgvector'
+        run: |
+          echo "Waiting for Postgres to be ready..."
+          for i in {1..30}; do
+            if docker exec pgvector pg_isready -U llamastack > /dev/null 2>&1; then
+              echo "Postgres is ready!"
+              break
+            fi
+            echo "Not ready yet... ($i)"
+            sleep 1
+          done
+
+      - name: Enable pgvector extension
+        if: matrix.vector-io-provider == 'remote::pgvector'
+        run: |
+          PGPASSWORD=llamastack psql -h localhost -U llamastack -d llamastack \
+            -c "CREATE EXTENSION IF NOT EXISTS vector;"
+
+      - name: Wait for ChromaDB to be ready
+        if: matrix.vector-io-provider == 'remote::chromadb'
+        run: |
+          echo "Waiting for ChromaDB to be ready..."
+          for i in {1..30}; do
+            if curl -s http://localhost:8000/api/v2/heartbeat | grep -q "nanosecond heartbeat"; then
+              echo "ChromaDB is ready!"
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "ChromaDB failed to start"
+          docker logs chromadb
+          exit 1
+
+      - name: Build Llama Stack
+        run: |
+          uv run llama stack build --template starter --image-type venv
+
+      - name: Check Storage and Memory Available Before Tests
+        if: ${{ always() }}
+        run: |
+          free -h
+          df -h
+
+      - name: Run Vector IO Integration Tests
+        env:
+          ENABLE_CHROMADB: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'true' || '' }}
+          CHROMADB_URL: ${{ matrix.vector-io-provider == 'remote::chromadb' && 'http://localhost:8000' || '' }}
+          ENABLE_PGVECTOR: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'true' || '' }}
+          PGVECTOR_HOST: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'localhost' || '' }}
+          PGVECTOR_PORT: ${{ matrix.vector-io-provider == 'remote::pgvector' && '5432' || '' }}
+          PGVECTOR_DB: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
+          PGVECTOR_USER: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
+          PGVECTOR_PASSWORD: ${{ matrix.vector-io-provider == 'remote::pgvector' && 'llamastack' || '' }}
+        run: |
+          uv run pytest -sv --stack-config="inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
+            tests/integration/vector_io \
+            --embedding-model all-MiniLM-L6-v2
+
+      - name: Check Storage and Memory Available After Tests
+        if: ${{ always() }}
+        run: |
+          free -h
+          df -h
+
+      - name: Create sanitized provider name
+        if: ${{ always() }}
+        run: |
+          echo "SANITIZED_PROVIDER=$(echo "${{ matrix.vector-io-provider }}" | tr ':' '_')" >> $GITHUB_ENV
+
+      - name: Write ChromaDB logs to file
+        if: ${{ always() && matrix.vector-io-provider == 'remote::chromadb' }}
+        run: |
+          docker logs chromadb > chromadb.log
+
+      - name: Upload all logs to artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
+          path: |
+            *.log
+          retention-days: 1
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,28 @@
 # Changelog

+# v0.2.12
+Published on: 2025-06-20T22:52:12Z
+
+## Highlights
+* Filter support in file search
+* Support auth attributes in inference and response stores
+
+
+---
+
+# v0.2.11
+Published on: 2025-06-17T20:26:26Z
+
+## Highlights
+* OpenAI-compatible vector store APIs
+* Hybrid Search in Sqlite-vec
+* File search tool in Responses API
+* Pagination in inference and response stores
+* Added `suffix` to completions API for fill-in-the-middle tasks
+
+
+---
+
 # v0.2.10.1
 Published on: 2025-06-06T20:11:02Z

@ -481,51 +504,4 @@ Published on: 2024-11-23T17:14:07Z



---
-
-# v0.0.54
-Published on: 2024-11-22T00:36:09Z
-
-
-
---
-
-# v0.0.53
-Published on: 2024-11-20T22:18:00Z
-
-🚀  Initial Release Notes for Llama Stack!
-
-### Added
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
- Persistence for registered objects with distribution
- Ability to persist memory banks created for FAISS
- PostgreSQL KVStore implementation
- Environment variable placeholder support in run.yaml files
- Comprehensive Zero-to-Hero notebooks and quickstart guides
- Support for quantized models in Ollama
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
- Bedrock distribution with safety shields support
- Evals API with task registration and scoring functions
- MMLU and SimpleQA benchmark scoring functions
- Huggingface dataset provider integration for benchmarks
- Support for custom dataset registration from local paths
- Benchmark evaluation CLI tools with visualization tables
- RAG evaluation scoring functions and metrics
- Local persistence for datasets and eval tasks
-
-### Changed
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
- Updated API signatures for dataset and eval task registration
- Restructured folder organization for providers
- Enhanced Docker build configuration
- Added version prefixing for REST API routes
- Enhanced evaluation task registration workflow
- Improved benchmark evaluation output formatting
- Restructured evals folder organization for better modularity
-
-### Removed
- `llama stack configure` command
-
-
 ---
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -146,7 +146,9 @@ in the runtime configuration to help route to the correct provider.""",


 class Provider(BaseModel):
-    provider_id: str
+    # provider_id of None means that the provider is not enabled - this happens
+    # when the provider is enabled via a conditional environment variable
+    provider_id: str | None
    provider_type: str
    config: dict[str, Any]

--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@ -48,6 +48,9 @@ class ProviderImpl(Providers):
        ret = []
        for api, providers in safe_config.providers.items():
            for p in providers:
+                # Skip providers that are not enabled
+                if p.provider_id is None:
+                    continue
                ret.append(
                    ProviderInfo(
                        api=api,
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@ -255,6 +255,10 @@ async def instantiate_providers(
    impls: dict[Api, Any] = {}
    inner_impls_by_provider_id: dict[str, dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
    for api_str, provider in sorted_providers:
+        # Skip providers that are not enabled
+        if provider.provider_id is None:
+            continue
+
        deps = {a: impls[a] for a in provider.spec.api_dependencies}
        for a in provider.spec.optional_api_dependencies:
            if a in impls:
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -137,6 +137,9 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):

    async def initialize(self) -> None:
        if isinstance(self.config, RemoteChromaVectorIOConfig):
+            if not self.config.url:
+                raise ValueError("URL is a required parameter for the remote Chroma provider's config")
+
            log.info(f"Connecting to Chroma server at: {self.config.url}")
            url = self.config.url.rstrip("/")
            parsed = urlparse(url)
--- a/llama_stack/providers/remote/vector_io/chroma/config.py
+++ b/llama_stack/providers/remote/vector_io/chroma/config.py
@ -10,7 +10,7 @@ from pydantic import BaseModel


 class ChromaVectorIOConfig(BaseModel):
-    url: str
+    url: str | None

    @classmethod
    def sample_run_config(cls, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
--- a/llama_stack/providers/remote/vector_io/pgvector/config.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/config.py
@ -13,11 +13,11 @@ from llama_stack.schema_utils import json_schema_type

@json_schema_type
 class PGVectorVectorIOConfig(BaseModel):
-    host: str = Field(default="localhost")
-    port: int = Field(default=5432)
-    db: str = Field(default="postgres")
-    user: str = Field(default="postgres")
-    password: str = Field(default="mysecretpassword")
+    host: str | None = Field(default="localhost")
+    port: int | None = Field(default=5432)
+    db: str | None = Field(default="postgres")
+    user: str | None = Field(default="postgres")
+    password: str | None = Field(default="mysecretpassword")

    @classmethod
    def sample_run_config(
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -15,7 +15,21 @@ from pydantic import BaseModel, TypeAdapter

 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
-from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.apis.vector_io import (
+    Chunk,
+    QueryChunksResponse,
+    SearchRankingOptions,
+    VectorIO,
+    VectorStoreChunkingStrategy,
+    VectorStoreDeleteResponse,
+    VectorStoreFileContentsResponse,
+    VectorStoreFileObject,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
+    VectorStoreListResponse,
+    VectorStoreObject,
+    VectorStoreSearchResponsePage,
+)
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.utils.memory.vector_store import (
    EmbeddingIndex,
@ -222,3 +236,108 @@ class PGVectorVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
        self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
        return self.cache[vector_db_id]
+
+    async def openai_create_vector_store(
+        self,
+        name: str,
+        file_ids: list[str] | None = None,
+        expires_after: dict[str, Any] | None = None,
+        chunking_strategy: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+        embedding_model: str | None = None,
+        embedding_dimension: int | None = 384,
+        provider_id: str | None = None,
+        provider_vector_db_id: str | None = None,
+    ) -> VectorStoreObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_list_vector_stores(
+        self,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+    ) -> VectorStoreListResponse:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_retrieve_vector_store(
+        self,
+        vector_store_id: str,
+    ) -> VectorStoreObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_update_vector_store(
+        self,
+        vector_store_id: str,
+        name: str | None = None,
+        expires_after: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> VectorStoreObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_delete_vector_store(
+        self,
+        vector_store_id: str,
+    ) -> VectorStoreDeleteResponse:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_search_vector_store(
+        self,
+        vector_store_id: str,
+        query: str | list[str],
+        filters: dict[str, Any] | None = None,
+        max_num_results: int | None = 10,
+        ranking_options: SearchRankingOptions | None = None,
+        rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
+    ) -> VectorStoreSearchResponsePage:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_attach_file_to_vector_store(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+        chunking_strategy: VectorStoreChunkingStrategy | None = None,
+    ) -> VectorStoreFileObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_list_files_in_vector_store(
+        self,
+        vector_store_id: str,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+        filter: VectorStoreFileStatus | None = None,
+    ) -> VectorStoreListFilesResponse:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_retrieve_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_retrieve_vector_store_file_contents(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileContentsResponse:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_update_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+        attributes: dict[str, Any] | None = None,
+    ) -> VectorStoreFileObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
+
+    async def openai_delete_vector_store_file(
+        self,
+        vector_store_id: str,
+        file_id: str,
+    ) -> VectorStoreFileObject:
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in PGVector")
--- a/llama_stack/templates/experimental-post-training/run.yaml
+++ b/llama_stack/templates/experimental-post-training/run.yaml
@ -24,7 +24,7 @@ providers:
  - provider_id: ollama
    provider_type: remote::ollama
    config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -32,7 +32,7 @@ providers:
      kvstore:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -40,7 +40,7 @@ providers:
  - provider_id: braintrust
    provider_type: inline::braintrust
    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
+      openai_api_key: ${env.OPENAI_API_KEY:+}
  datasetio:
  - provider_id: localfs
    provider_type: inline::localfs
@ -48,14 +48,14 @@ providers:
      kvstore:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/huggingface}/huggingface_datasetio.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/huggingface}/huggingface_datasetio.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -74,7 +74,7 @@ providers:
      persistence_store:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/agents_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -86,19 +86,19 @@ providers:
      kvstore:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/faiss_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
      max_results: 3


 metadata_store:
  namespace: null
  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/registry.db
 models: []
 shields: []
 vector_dbs: []
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -46,7 +46,7 @@ def get_distribution_template() -> DistributionTemplate:
        provider_type="inline::meta-reference",
        config=MetaReferenceInferenceConfig.sample_run_config(
            model="${env.INFERENCE_MODEL}",
-            checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
+            checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:=null}",
        ),
    )
    embedding_provider = Provider(
@ -112,7 +112,7 @@ def get_distribution_template() -> DistributionTemplate:
                            provider_type="inline::meta-reference",
                            config=MetaReferenceInferenceConfig.sample_run_config(
                                model="${env.SAFETY_MODEL}",
-                                checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:null}",
+                                checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:=null}",
                            ),
                        ),
                    ],
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -16,7 +16,7 @@ providers:
    provider_type: inline::meta-reference
    config:
      model: ${env.INFERENCE_MODEL}
-      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
+      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
      quantization:
        type: ${env.QUANTIZATION_TYPE:=bf16}
      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
@ -29,7 +29,7 @@ providers:
    provider_type: inline::meta-reference
    config:
      model: ${env.SAFETY_MODEL}
-      checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
+      checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null}
      quantization:
        type: ${env.QUANTIZATION_TYPE:=bf16}
      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -16,7 +16,7 @@ providers:
    provider_type: inline::meta-reference
    config:
      model: ${env.INFERENCE_MODEL}
-      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
+      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
      quantization:
        type: ${env.QUANTIZATION_TYPE:=bf16}
      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -46,7 +46,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
                    model_type=ModelType.llm,
                )
            ],
-            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
+            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"),
        ),
        (
            "anthropic",
@ -56,7 +56,7 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
                    model_type=ModelType.llm,
                )
            ],
-            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"),
+            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"),
        ),
        (
            "gemini",
@ -66,17 +66,17 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
                    model_type=ModelType.llm,
                )
            ],
-            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
+            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"),
        ),
        (
            "groq",
            [],
-            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
+            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"),
        ),
        (
            "together",
            [],
-            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
+            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"),
        ),
    ]
    inference_providers = []
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -15,20 +15,20 @@ providers:
  - provider_id: openai
    provider_type: remote::openai
    config:
-      api_key: ${env.OPENAI_API_KEY:}
+      api_key: ${env.OPENAI_API_KEY:+}
  - provider_id: anthropic
    provider_type: remote::anthropic
    config:
-      api_key: ${env.ANTHROPIC_API_KEY:}
+      api_key: ${env.ANTHROPIC_API_KEY:+}
  - provider_id: gemini
    provider_type: remote::gemini
    config:
-      api_key: ${env.GEMINI_API_KEY:}
+      api_key: ${env.GEMINI_API_KEY:+}
  - provider_id: groq
    provider_type: remote::groq
    config:
      url: https://api.groq.com
-      api_key: ${env.GROQ_API_KEY:}
+      api_key: ${env.GROQ_API_KEY:+}
  - provider_id: together
    provider_type: remote::together
    config:
--- a/llama_stack/templates/postgres-demo/postgres_demo.py
+++ b/llama_stack/templates/postgres-demo/postgres_demo.py
@ -29,7 +29,7 @@ def get_distribution_template() -> DistributionTemplate:
            provider_id="vllm-inference",
            provider_type="remote::vllm",
            config=VLLMInferenceAdapterConfig.sample_run_config(
-                url="${env.VLLM_URL:http://localhost:8000/v1}",
+                url="${env.VLLM_URL:=http://localhost:8000/v1}",
            ),
        ),
    ]
--- a/llama_stack/templates/postgres-demo/run.yaml
+++ b/llama_stack/templates/postgres-demo/run.yaml
@ -12,7 +12,7 @@ providers:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
      api_token: ${env.VLLM_API_TOKEN:=fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -15,7 +15,7 @@ providers:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
      api_token: ${env.VLLM_API_TOKEN:=fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -15,7 +15,7 @@ providers:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_URL:http://localhost:8000/v1}
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
      api_token: ${env.VLLM_API_TOKEN:=fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -44,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
        provider_id="vllm-inference",
        provider_type="remote::vllm",
        config=VLLMInferenceAdapterConfig.sample_run_config(
-            url="${env.VLLM_URL:http://localhost:8000/v1}",
+            url="${env.VLLM_URL:=http://localhost:8000/v1}",
        ),
    )
    embedding_provider = Provider(
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@ -68,7 +68,7 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
-  - provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec}
+  - provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
    provider_type: inline::sqlite-vec
    config:
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@ -175,7 +175,7 @@ def get_distribution_template() -> DistributionTemplate:
            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
        ),
        Provider(
-            provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}",
+            provider_id="${env.ENABLE_SQLITE_VEC:+sqlite-vec}",
            provider_type="inline::sqlite-vec",
            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
        ),
--- a/scripts/check-init-py.sh
+++ b/scripts/check-init-py.sh
@ -29,6 +29,7 @@ mapfile -t py_dirs < <(
        -type f \
        -name "*.py" ! -name "__init__.py" \
        ! -path "*/.venv/*" \
+        ! -path "*/node_modules/*" \
        -exec dirname {} \; | sort -u
 )