Merge-related changes.

2026-01-03 01:52:16 +00:00 · 2025-04-02 19:56:44 +02:00 · 2025-04-02 19:56:44 +02:00 · 60e9f46856
commit 60e9f46856
parent d38aea33c1 66d6c2580e
456 changed files with 38636 additions and 10892 deletions
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -10,7 +10,11 @@ from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput
 from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -90,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
        },
--- a/llama_stack/templates/bedrock/doc_template.md
+++ b/llama_stack/templates/bedrock/doc_template.md
@ -47,9 +47,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -40,20 +40,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/bedrock/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -15,7 +15,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
 from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -113,7 +117,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "CEREBRAS_API_KEY": (
--- a/llama_stack/templates/cerebras/doc_template.md
+++ b/llama_stack/templates/cerebras/doc_template.md
@ -39,9 +39,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
@ -55,6 +56,6 @@ docker run \
 ```bash
 llama stack build --template cerebras --image-type conda
 llama stack run ./run.yaml \
-  --port 5001 \
+  --port 8321 \
  --env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
 ```
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@ -24,7 +24,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
@ -44,14 +45,26 @@ providers:
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -67,7 +80,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db}
  tool_runtime:
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@ -16,10 +16,16 @@ from llama_stack.distribution.datatypes import (
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
-from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
+from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
+    SQLiteVectorIOConfig,
+)
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -117,7 +123,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "FIREWORKS_API_KEY": (
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -29,7 +29,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -42,20 +43,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ci-tests/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/dell/doc_template.md
+++ b/llama_stack/templates/dell/doc_template.md
@ -43,6 +43,7 @@ export CUDA_VISIBLE_DEVICES=0
 export LLAMA_STACK_PORT=8321

 docker run --rm -it \
+  --pull always \
  --network host \
  -v $HOME/.cache/huggingface:/data \
  -e HF_TOKEN=$HF_TOKEN \
@ -66,6 +67,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run --rm -it \
+  --pull always \
  --network host \
  -v $HOME/.cache/huggingface:/data \
  -e HF_TOKEN=$HF_TOKEN \
@ -108,6 +110,7 @@ This method allows you to get started quickly without having to build the distri

 ```bash
 docker run -it \
+  --pull always \
  --network host \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v $HOME/.llama:/root/.llama \
@ -135,6 +138,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v $HOME/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@ -32,7 +32,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,20 +46,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@ -28,7 +28,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -41,20 +42,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@ -0,0 +1,767 @@
+{
+  "bedrock": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "boto3",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
+  ],
+  "cerebras": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "cerebras_cloud_sdk",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "ci-tests": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlite-vec",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "dell": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "dev": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlite-vec",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "fireworks": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "groq": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
+  ],
+  "hf-endpoint": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
+  ],
+  "hf-serverless": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "meta-reference-gpu": [
+    "accelerate",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fairscale",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "lm-format-enforcer",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "zmq"
+  ],
+  "meta-reference-quantized-gpu": [
+    "accelerate",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fairscale",
+    "faiss-cpu",
+    "fastapi",
+    "fbgemm-gpu",
+    "fire",
+    "httpx",
+    "langdetect",
+    "lm-format-enforcer",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "torch",
+    "torchao==0.5.0",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "zmq"
+  ],
+  "nvidia": [
+    "aiohttp",
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
+  ],
+  "ollama": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "ollama",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
+  ],
+  "open-benchmark": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlite-vec",
+    "together",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
+  ],
+  "passthrough": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "remote-vllm": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "sambanova": [
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn"
+  ],
+  "tgi": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "together": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "together",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "vllm-gpu": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "vllm",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ]
+}
--- a/llama_stack/templates/dev/dev.py
+++ b/llama_stack/templates/dev/dev.py
@ -17,20 +17,38 @@ from llama_stack.distribution.datatypes import (
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
-from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
+from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
+    SQLiteVectorIOConfig,
+)
 from llama_stack.providers.remote.inference.anthropic.config import AnthropicConfig
-from llama_stack.providers.remote.inference.anthropic.models import MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES
+from llama_stack.providers.remote.inference.anthropic.models import (
+    MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES,
+)
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES
+from llama_stack.providers.remote.inference.fireworks.models import (
+    MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
+)
 from llama_stack.providers.remote.inference.gemini.config import GeminiConfig
-from llama_stack.providers.remote.inference.gemini.models import MODEL_ENTRIES as GEMINI_MODEL_ENTRIES
+from llama_stack.providers.remote.inference.gemini.models import (
+    MODEL_ENTRIES as GEMINI_MODEL_ENTRIES,
+)
 from llama_stack.providers.remote.inference.groq.config import GroqConfig
-from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
+from llama_stack.providers.remote.inference.groq.models import (
+    MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
+)
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
-from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
+from llama_stack.providers.remote.inference.openai.models import (
+    MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
+)
 from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.providers.remote.vector_io.pgvector.config import (
+    PGVectorVectorIOConfig,
+)
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
@ -181,7 +199,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "FIREWORKS_API_KEY": (
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@ -58,7 +58,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -71,20 +72,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dev/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/experimental-post-training/run.yaml
+++ b/llama_stack/templates/experimental-post-training/run.yaml
@ -28,7 +28,11 @@ providers:
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -40,7 +44,11 @@ providers:
  datasetio:
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -58,7 +66,7 @@ providers:
      persistence_store:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/agents_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -70,7 +78,7 @@ providers:
      kvstore:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/faiss_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -82,7 +90,7 @@ providers:
 metadata_store:
  namespace: null
  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/experimental-post-training}/registry.db
 models: []
 shields: []
 vector_dbs: []
--- a/llama_stack/templates/fireworks/doc_template.md
+++ b/llama_stack/templates/fireworks/doc_template.md
@ -49,9 +49,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -20,7 +20,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -172,7 +176,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "FIREWORKS_API_KEY": (
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -51,20 +51,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -89,7 +101,8 @@ providers:
      max_results: 3
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -32,7 +32,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,20 +46,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -83,7 +96,8 @@ providers:
      max_results: 3
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
--- a/llama_stack/templates/groq/doc_template.md
+++ b/llama_stack/templates/groq/doc_template.md
@ -49,9 +49,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@ -8,17 +8,17 @@ from pathlib import Path

 from llama_stack.apis.models.models import ModelType
 from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ToolGroupInput,
-)
+from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.inference.groq import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -110,7 +110,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMASTACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "GROQ_API_KEY": (
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@ -32,7 +32,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,20 +46,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/groq/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@ -141,7 +141,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "HF_API_TOKEN": (
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -37,7 +37,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -50,20 +51,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -32,7 +32,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,20 +46,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@ -142,7 +142,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "HF_API_TOKEN": (
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -37,7 +37,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -50,20 +51,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -32,7 +32,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,20 +46,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@ -65,9 +65,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -80,6 +81,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -95,7 +97,7 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
 ```bash
 llama stack build --template {{ name }} --image-type conda
 llama stack run distributions/{{ name }}/run.yaml \
-  --port 5001 \
+  --port 8321 \
  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 ```

@ -103,7 +105,7 @@ If you are using Llama Stack Safety / Shield APIs, use:

 ```bash
 llama stack run distributions/{{ name }}/run-with-safety.yaml \
-  --port 5001 \
+  --port 8321 \
  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 ```
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -148,7 +148,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "INFERENCE_MODEL": (
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -39,7 +39,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -52,20 +53,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -33,7 +33,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -46,20 +47,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
@ -67,9 +67,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -82,6 +83,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@ -113,7 +113,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "INFERENCE_MODEL": (
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@ -35,7 +35,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -48,20 +49,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-quantized-gpu/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@ -1,32 +1,27 @@
 version: '2'
 distribution_spec:
-  description: Use NVIDIA NIM for running LLM inference
+  description: Use NVIDIA NIM for running LLM inference and safety
  providers:
    inference:
    - remote::nvidia
    vector_io:
    - inline::faiss
    safety:
-    - inline::llama-guard
+    - remote::nvidia
    agents:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
    eval:
    - inline::meta-reference
+    post_training:
+    - remote::nvidia
    datasetio:
-    - remote::huggingface
    - inline::localfs
    scoring:
    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
-    - remote::model-context-protocol
    preprocessing:
    - inline::basic
    - inline::simple_chunking
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@ -39,9 +39,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
@ -55,7 +56,7 @@ docker run \
 ```bash
 llama stack build --template nvidia --image-type conda
 llama stack run ./run.yaml \
-  --port 5001 \
+  --port 8321 \
  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
  --env INFERENCE_MODEL=$INFERENCE_MODEL
 ```
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -7,9 +7,10 @@
 from pathlib import Path

 from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput
-from llama_stack.distribution.datatypes import Provider, ToolGroupInput
+from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
+from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry


@ -17,19 +18,14 @@ def get_distribution_template() -> DistributionTemplate:
    providers = {
        "inference": ["remote::nvidia"],
        "vector_io": ["inline::faiss"],
-        "safety": ["inline::llama-guard"],
+        "safety": ["remote::nvidia"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::code-interpreter",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
+        "post_training": ["remote::nvidia"],
+        "datasetio": ["inline::localfs"],
+        "scoring": ["inline::basic"],
+        "tool_runtime": ["inline::rag-runtime"],
        "preprocessing": ["inline::basic", "inline::simple_chunking"],
    }

@ -38,23 +34,28 @@ def get_distribution_template() -> DistributionTemplate:
        provider_type="remote::nvidia",
        config=NVIDIAConfig.sample_run_config(),
    )
+    safety_provider = Provider(
+        provider_id="nvidia",
+        provider_type="remote::nvidia",
+        config=NVIDIASafetyConfig.sample_run_config(),
+    )
+    inference_model = ModelInput(
+        model_id="${env.INFERENCE_MODEL}",
+        provider_id="nvidia",
+    )
+    safety_model = ModelInput(
+        model_id="${env.SAFETY_MODEL}",
+        provider_id="nvidia",
+    )

    available_models = {
        "nvidia": MODEL_ENTRIES,
    }
    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
        ToolGroupInput(
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]
    default_preprocessors = [
        PreprocessorInput(
@ -71,7 +72,7 @@ def get_distribution_template() -> DistributionTemplate:
    return DistributionTemplate(
        name="nvidia",
        distro_type="remote_hosted",
-        description="Use NVIDIA NIM for running LLM inference",
+        description="Use NVIDIA NIM for running LLM inference and safety",
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
@ -85,15 +86,60 @@ def get_distribution_template() -> DistributionTemplate:
                default_tool_groups=default_tool_groups,
                default_preprocessors=default_preprocessors,
            ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        safety_provider,
+                    ]
+                },
+                default_models=[inference_model, safety_model],
+                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
+                default_tool_groups=default_tool_groups,
+                default_preprocessors=default_preprocessors,
+            ),
        },
        run_config_env_vars={
-            "LLAMASTACK_PORT": (
-                "5001",
-                "Port for the Llama Stack distribution server",
-            ),
            "NVIDIA_API_KEY": (
                "",
                "NVIDIA API Key",
            ),
+            ## Nemo Customizer related variables
+            "NVIDIA_USER_ID": (
+                "llama-stack-user",
+                "NVIDIA User ID",
+            ),
+            "NVIDIA_DATASET_NAMESPACE": (
+                "default",
+                "NVIDIA Dataset Namespace",
+            ),
+            "NVIDIA_ACCESS_POLICIES": (
+                "{}",
+                "NVIDIA Access Policies",
+            ),
+            "NVIDIA_PROJECT_ID": (
+                "test-project",
+                "NVIDIA Project ID",
+            ),
+            "NVIDIA_CUSTOMIZER_URL": (
+                "https://customizer.api.nvidia.com",
+                "NVIDIA Customizer URL",
+            ),
+            "NVIDIA_OUTPUT_MODEL_DIR": (
+                "test-example-model@v1",
+                "NVIDIA Output Model Directory",
+            ),
+            "GUARDRAILS_SERVICE_URL": (
+                "http://0.0.0.0:7331",
+                "URL for the NeMo Guardrails Service",
+            ),
+            "INFERENCE_MODEL": (
+                "Llama3.1-8B-Instruct",
+                "Inference model",
+            ),
+            "SAFETY_MODEL": (
+                "meta/llama-3.1-8b-instruct",
+                "Name of the model to use for safety",
+            ),
        },
    )
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@ -0,0 +1,123 @@
+version: '2'
+image_name: nvidia
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- post_training
+- preprocessing
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:}
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
+      config_id: self-check
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
+  safety:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
+      config_id: self-check
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
+  post_training:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
+      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
+  datasetio:
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  tool_runtime:
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  preprocessing:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: simple_chunking
+    provider_type: inline::simple_chunking
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: nvidia
+  model_type: llm
+- metadata: {}
+  model_id: ${env.SAFETY_MODEL}
+  provider_id: nvidia
+  model_type: llm
+shields:
+- shield_id: ${env.SAFETY_MODEL}
+  provider_id: nvidia
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+preprocessors:
+- preprocessor_id: builtin::basic
+  provider_id: basic
+- preprocessor_id: builtin::chunking
+  provider_id: simple_chunking
+server:
+  port: 8321
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -5,6 +5,7 @@ apis:
 - datasetio
 - eval
 - inference
+- post_training
 - preprocessing
 - safety
 - scoring
@ -27,9 +28,11 @@ providers:
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
+      config_id: self-check
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -42,51 +45,41 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
+  post_training:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
+      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config: {}
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
  preprocessing:
  - provider_id: basic
    provider_type: inline::basic
@ -222,12 +215,8 @@ datasets: []
 scoring_fns: []
 benchmarks: []
 tool_groups:
- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 preprocessors:
 - preprocessor_id: builtin::basic
  provider_id: basic
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@ -60,9 +60,10 @@ Now you are ready to run Llama Stack with Ollama as the inference provider. You
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-export LLAMA_STACK_PORT=5001
+export LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -80,6 +81,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
@ -96,7 +98,7 @@ docker run \
 Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.

 ```bash
-export LLAMA_STACK_PORT=5001
+export LLAMA_STACK_PORT=8321

 llama stack build --template {{ name }} --image-type conda
 llama stack run ./run.yaml \
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@ -152,7 +152,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "OLLAMA_URL": (
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -44,20 +44,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -91,7 +103,8 @@ providers:
    config: {}
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  preprocessing:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -28,7 +28,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -41,20 +42,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -88,7 +101,8 @@ providers:
    config: {}
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  preprocessing:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/open-benchmark/init.py
+++ b/llama_stack/templates/open-benchmark/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .open_benchmark import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -0,0 +1,306 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Dict, List, Tuple
+
+from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    BenchmarkInput,
+    DatasetInput,
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
+    SQLiteVectorIOConfig,
+)
+from llama_stack.providers.remote.inference.anthropic.config import AnthropicConfig
+from llama_stack.providers.remote.inference.gemini.config import GeminiConfig
+from llama_stack.providers.remote.inference.groq.config import GroqConfig
+from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
+from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
+from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
+from llama_stack.providers.remote.vector_io.pgvector.config import (
+    PGVectorVectorIOConfig,
+)
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)
+
+
+def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]:
+    # in this template, we allow each API key to be optional
+    providers = [
+        (
+            "openai",
+            [
+                ProviderModelEntry(
+                    provider_model_id="openai/gpt-4o",
+                    model_type=ModelType.llm,
+                )
+            ],
+            OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:}"),
+        ),
+        (
+            "anthropic",
+            [
+                ProviderModelEntry(
+                    provider_model_id="anthropic/claude-3-5-sonnet-latest",
+                    model_type=ModelType.llm,
+                )
+            ],
+            AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:}"),
+        ),
+        (
+            "gemini",
+            [
+                ProviderModelEntry(
+                    provider_model_id="gemini/gemini-1.5-flash",
+                    model_type=ModelType.llm,
+                )
+            ],
+            GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:}"),
+        ),
+        (
+            "groq",
+            [],
+            GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:}"),
+        ),
+        (
+            "together",
+            [],
+            TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:}"),
+        ),
+    ]
+    inference_providers = []
+    available_models = {}
+    for provider_id, model_entries, config in providers:
+        inference_providers.append(
+            Provider(
+                provider_id=provider_id,
+                provider_type=f"remote::{provider_id}",
+                config=config,
+            )
+        )
+        available_models[provider_id] = model_entries
+    return inference_providers, available_models
+
+
+def get_distribution_template() -> DistributionTemplate:
+    inference_providers, available_models = get_inference_providers()
+    providers = {
+        "inference": [p.provider_type for p in inference_providers],
+        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+    name = "open-benchmark"
+
+    vector_io_providers = [
+        Provider(
+            provider_id="sqlite-vec",
+            provider_type="inline::sqlite-vec",
+            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_type="remote::chromadb",
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_type="remote::pgvector",
+            config=PGVectorVectorIOConfig.sample_run_config(
+                db="${env.PGVECTOR_DB:}",
+                user="${env.PGVECTOR_USER:}",
+                password="${env.PGVECTOR_PASSWORD:}",
+            ),
+        ),
+    ]
+
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::code_interpreter",
+            provider_id="code-interpreter",
+        ),
+    ]
+
+    default_models = get_model_registry(available_models) + [
+        ModelInput(
+            model_id="meta-llama/Llama-3.3-70B-Instruct",
+            provider_id="groq",
+            provider_model_id="groq/llama-3.3-70b-versatile",
+            model_type=ModelType.llm,
+        ),
+        ModelInput(
+            model_id="meta-llama/Llama-3.1-405B-Instruct",
+            provider_id="together",
+            provider_model_id="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+            model_type=ModelType.llm,
+        ),
+    ]
+
+    default_datasets = [
+        DatasetInput(
+            dataset_id="simpleqa",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/simpleqa?split=train",
+            ),
+        ),
+        DatasetInput(
+            dataset_id="mmlu_cot",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all",
+            ),
+        ),
+        DatasetInput(
+            dataset_id="gpqa_cot",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
+            ),
+        ),
+        DatasetInput(
+            dataset_id="math_500",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/math_500?split=test",
+            ),
+        ),
+        DatasetInput(
+            dataset_id="bfcl",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
+            ),
+        ),
+        DatasetInput(
+            dataset_id="ifeval",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/IfEval?split=train",
+            ),
+        ),
+        DatasetInput(
+            dataset_id="docvqa",
+            purpose=DatasetPurpose.eval_messages_answer,
+            source=URIDataSource(
+                uri="huggingface://datasets/llamastack/docvqa?split=val",
+            ),
+        ),
+    ]
+
+    default_benchmarks = [
+        BenchmarkInput(
+            benchmark_id="meta-reference-simpleqa",
+            dataset_id="simpleqa",
+            scoring_functions=["llm-as-judge::405b-simpleqa"],
+        ),
+        BenchmarkInput(
+            benchmark_id="meta-reference-mmlu-cot",
+            dataset_id="mmlu_cot",
+            scoring_functions=["basic::regex_parser_multiple_choice_answer"],
+        ),
+        BenchmarkInput(
+            benchmark_id="meta-reference-gpqa-cot",
+            dataset_id="gpqa_cot",
+            scoring_functions=["basic::regex_parser_multiple_choice_answer"],
+        ),
+        BenchmarkInput(
+            benchmark_id="meta-reference-math-500",
+            dataset_id="math_500",
+            scoring_functions=["basic::regex_parser_math_response"],
+        ),
+        BenchmarkInput(
+            benchmark_id="meta-reference-bfcl",
+            dataset_id="bfcl",
+            scoring_functions=["basic::bfcl"],
+        ),
+        BenchmarkInput(
+            benchmark_id="meta-reference-ifeval",
+            dataset_id="ifeval",
+            scoring_functions=["basic::ifeval"],
+        ),
+        BenchmarkInput(
+            benchmark_id="meta-reference-docvqa",
+            dataset_id="docvqa",
+            scoring_functions=["basic::docvqa"],
+        ),
+    ]
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Distribution for running open benchmarks",
+        container_image=None,
+        template_path=None,
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": inference_providers,
+                    "vector_io": vector_io_providers,
+                },
+                default_models=default_models,
+                default_tool_groups=default_tool_groups,
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                default_datasets=default_datasets,
+                default_benchmarks=default_benchmarks,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "8321",
+                "Port for the Llama Stack distribution server",
+            ),
+            "TOGETHER_API_KEY": (
+                "",
+                "Together API Key",
+            ),
+            "OPENAI_API_KEY": (
+                "",
+                "OpenAI API Key",
+            ),
+            "GEMINI_API_KEY": (
+                "",
+                "Gemini API Key",
+            ),
+            "ANTHROPIC_API_KEY": (
+                "",
+                "Anthropic API Key",
+            ),
+            "GROQ_API_KEY": (
+                "",
+                "Groq API Key",
+            ),
+        },
+    )
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -38,7 +38,7 @@ providers:
  - provider_id: sqlite-vec
    provider_type: inline::sqlite-vec
    config:
-      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/sqlite_vec.db
+      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/sqlite_vec.db
  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
    provider_type: remote::chromadb
    config:
@ -54,7 +54,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -62,25 +63,37 @@ providers:
      persistence_store:
        type: sqlite
        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/agents_store.db
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dev/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -114,18 +127,13 @@ providers:
    config: {}
 metadata_store:
  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/registry.db
 models:
 - metadata: {}
  model_id: openai/gpt-4o
  provider_id: openai
  provider_model_id: openai/gpt-4o
  model_type: llm
- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  model_type: llm
 - metadata: {}
  model_id: anthropic/claude-3-5-sonnet-latest
  provider_id: anthropic
@ -141,84 +149,94 @@ models:
  provider_id: groq
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-405B-Instruct
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  model_type: llm
 shields:
 - shield_id: meta-llama/Llama-Guard-3-8B
 vector_dbs: []
 datasets:
-  - dataset_id: simpleqa
-    provider_id: huggingface
-    url:
-      uri: https://huggingface.co/datasets/llamastack/simpleqa
-    metadata:
-      path: llamastack/simpleqa
-      name:
-      split: train
-    dataset_schema:
-      input_query:
-        type: string
-      expected_answer:
-        type: string
-      chat_completion_input:
-        type: string
-  - dataset_id: mmlu_cot
-    provider_id: huggingface
-    url:
-      uri: https://huggingface.co/datasets/llamastack/mmlu_cot
-    metadata:
-      path: llamastack/mmlu_cot
-      name: all
-      split: test
-    dataset_schema:
-      input_query:
-        type: string
-      expected_answer:
-        type: string
-      chat_completion_input:
-        type: string
-  - dataset_id: gpqa_cot
-    provider_id: huggingface
-    url:
-      uri: https://huggingface.co/datasets/llamastack/gpqa_0shot_cot
-    metadata:
-      path: llamastack/gpqa_0shot_cot
-      name: gpqa_main
-      split: train
-    dataset_schema:
-      input_query:
-        type: string
-      expected_answer:
-        type: string
-      chat_completion_input:
-        type: string
-  - dataset_id: math_500
-    provider_id: huggingface
-    url:
-      uri: https://huggingface.co/datasets/llamastack/math_500
-    metadata:
-      path: llamastack/math_500
-      name:
-      split: test
-    dataset_schema:
-      input_query:
-        type: string
-      expected_answer:
-        type: string
-      chat_completion_input:
-        type: string
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/simpleqa?split=train
+  metadata: {}
+  dataset_id: simpleqa
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
+  metadata: {}
+  dataset_id: mmlu_cot
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
+  metadata: {}
+  dataset_id: gpqa_cot
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/math_500?split=test
+  metadata: {}
+  dataset_id: math_500
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/bfcl_v3?split=train
+  metadata: {}
+  dataset_id: bfcl
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/IfEval?split=train
+  metadata: {}
+  dataset_id: ifeval
+- purpose: eval/messages-answer
+  source:
+    type: uri
+    uri: huggingface://datasets/llamastack/docvqa?split=val
+  metadata: {}
+  dataset_id: docvqa
 scoring_fns: []
 benchmarks:
-  - benchmark_id: meta-reference-simpleqa
-    dataset_id: simpleqa
-    scoring_functions: ["llm-as-judge::405b-simpleqa"]
-  - benchmark_id: meta-reference-mmlu-cot
-    dataset_id: mmlu_cot
-    scoring_functions: ["basic::regex_parser_multiple_choice_answer"]
-  - benchmark_id: meta-reference-gpqa-cot
-    dataset_id: gpqa_cot
-    scoring_functions: ["basic::regex_parser_multiple_choice_answer"]
-  - benchmark_id: meta-reference-math-500
-    dataset_id: math_500
-    scoring_functions: ["basic::regex_parser_math_response"]
+- dataset_id: simpleqa
+  scoring_functions:
+  - llm-as-judge::405b-simpleqa
+  metadata: {}
+  benchmark_id: meta-reference-simpleqa
+- dataset_id: mmlu_cot
+  scoring_functions:
+  - basic::regex_parser_multiple_choice_answer
+  metadata: {}
+  benchmark_id: meta-reference-mmlu-cot
+- dataset_id: gpqa_cot
+  scoring_functions:
+  - basic::regex_parser_multiple_choice_answer
+  metadata: {}
+  benchmark_id: meta-reference-gpqa-cot
+- dataset_id: math_500
+  scoring_functions:
+  - basic::regex_parser_math_response
+  metadata: {}
+  benchmark_id: meta-reference-math-500
+- dataset_id: bfcl
+  scoring_functions:
+  - basic::bfcl
+  metadata: {}
+  benchmark_id: meta-reference-bfcl
+- dataset_id: ifeval
+  scoring_functions:
+  - basic::ifeval
+  metadata: {}
+  benchmark_id: meta-reference-ifeval
+- dataset_id: docvqa
+  scoring_functions:
+  - basic::docvqa
+  metadata: {}
+  benchmark_id: meta-reference-docvqa
 tool_groups:
 - toolgroup_id: builtin::websearch
  provider_id: tavily-search
@ -226,5 +244,6 @@ tool_groups:
  provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
  provider_id: code-interpreter
+preprocessors: []
 server:
  port: 8321
--- a/llama_stack/templates/passthrough/init.py
+++ b/llama_stack/templates/passthrough/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .passthrough import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/passthrough/build.yaml
+++ b/llama_stack/templates/passthrough/build.yaml
@ -1,9 +1,10 @@
 version: '2'
 distribution_spec:
-  description: Use for running LLM inference with the endpoint that compatible with Llama Stack API
+  description: Use Passthrough hosted llama-stack endpoint for LLM inference
  providers:
    inference:
    - remote::passthrough
+    - inline::sentence-transformers
    vector_io:
    - inline::faiss
    - remote::chromadb
@ -26,10 +27,8 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
+    - remote::wolfram-alpha
    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
-    preprocessing:
-    - inline::basic
-    - inline::simple_chunking
 image_type: conda
--- a/llama_stack/templates/passthrough/doc_template.md
+++ b/llama_stack/templates/passthrough/doc_template.md
@ -0,0 +1,35 @@
+---
+orphan: true
+---
+# Passthrough Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+{% if run_config_env_vars %}
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+{% if default_models %}
+### Models
+
+The following models are available by default:
+
+{% for model in default_models %}
+- `{{ model.model_id }} {{ model.doc_string }}`
+{% endfor %}
+{% endif %}
--- a/llama_stack/templates/passthrough/passthrough.py
+++ b/llama_stack/templates/passthrough/passthrough.py
@ -0,0 +1,198 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack.providers.remote.inference.passthrough.config import (
+    PassthroughImplConfig,
+)
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::passthrough", "inline::sentence-transformers"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "remote::wolfram-alpha",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+
+    name = "passthrough"
+
+    inference_provider = Provider(
+        provider_id="passthrough",
+        provider_type="remote::passthrough",
+        config=PassthroughImplConfig.sample_run_config(),
+    )
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    vector_io_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
+
+    default_models = [
+        ModelInput(
+            metadata={},
+            model_id="meta-llama/Llama-3.1-8B-Instruct",
+            provider_id="passthrough",
+            provider_model_id="llama3.1-8b-instruct",
+            model_type=ModelType.llm,
+        ),
+        ModelInput(
+            metadata={},
+            model_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            provider_id="passthrough",
+            provider_model_id="llama3.2-11b-vision-instruct",
+            model_type=ModelType.llm,
+        ),
+    ]
+
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::wolfram_alpha",
+            provider_id="wolfram-alpha",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::code_interpreter",
+            provider_id="code-interpreter",
+        ),
+    ]
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Use Passthrough hosted llama-stack endpoint for LLM inference",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        available_models_by_provider={
+            "passthrough": [
+                ProviderModelEntry(
+                    provider_model_id="llama3.1-8b-instruct",
+                    model_type=ModelType.llm,
+                ),
+                ProviderModelEntry(
+                    provider_model_id="llama3.2-11b-vision-instruct",
+                    model_type=ModelType.llm,
+                ),
+            ],
+        },
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                    "vector_io": [vector_io_provider],
+                },
+                default_models=default_models + [embedding_model],
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                default_tool_groups=default_tool_groups,
+            ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        embedding_provider,
+                    ],
+                    "vector_io": [vector_io_provider],
+                    "safety": [
+                        Provider(
+                            provider_id="llama-guard",
+                            provider_type="inline::llama-guard",
+                            config={},
+                        ),
+                        Provider(
+                            provider_id="llama-guard-vision",
+                            provider_type="inline::llama-guard",
+                            config={},
+                        ),
+                        Provider(
+                            provider_id="code-scanner",
+                            provider_type="inline::code-scanner",
+                            config={},
+                        ),
+                    ],
+                },
+                default_models=[
+                    *default_models,
+                    embedding_model,
+                ],
+                default_shields=[
+                    ShieldInput(
+                        shield_id="meta-llama/Llama-Guard-3-8B",
+                        provider_id="llama-guard",
+                    ),
+                    ShieldInput(
+                        shield_id="meta-llama/Llama-Guard-3-11B-Vision",
+                        provider_id="llama-guard-vision",
+                    ),
+                    ShieldInput(
+                        shield_id="CodeScanner",
+                        provider_id="code-scanner",
+                    ),
+                ],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "8321",
+                "Port for the Llama Stack distribution server",
+            ),
+            "PASSTHROUGH_API_KEY": (
+                "",
+                "Passthrough API Key",
+            ),
+            "PASSTHROUGH_URL": (
+                "",
+                "Passthrough URL",
+            ),
+        },
+    )
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@ -0,0 +1,155 @@
+version: '2'
+image_name: passthrough
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: passthrough
+    provider_type: remote::passthrough
+    config:
+      url: ${env.PASSTHROUGH_URL}
+      api_key: ${env.PASSTHROUGH_API_KEY}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  - provider_id: llama-guard-vision
+    provider_type: inline::llama-guard
+    config: {}
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
+models:
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: passthrough
+  provider_model_id: llama3.1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: passthrough
+  provider_model_id: llama3.2-11b-vision-instruct
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+  provider_id: llama-guard
+- shield_id: meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: llama-guard-vision
+- shield_id: CodeScanner
+  provider_id: code-scanner
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
+preprocessors: []
+server:
+  port: 8321
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@ -31,7 +31,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -44,20 +45,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -80,6 +93,10 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
+  - provider_id: wolfram-alpha
+    provider_type: remote::wolfram-alpha
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
@ -89,16 +106,9 @@ providers:
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
    config: {}
-  preprocessing:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: simple_chunking
-    provider_type: inline::simple_chunking
-    config: {}
 metadata_store:
  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-llama}/registry.db
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
@ -110,23 +120,26 @@ models:
  provider_id: passthrough
  provider_model_id: llama3.2-11b-vision-instruct
  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
 shields:
 - shield_id: meta-llama/Llama-Guard-3-8B
 vector_dbs: []
 datasets: []
 scoring_fns: []
-eval_tasks: []
+benchmarks: []
 tool_groups:
 - toolgroup_id: builtin::websearch
  provider_id: tavily-search
+- toolgroup_id: builtin::wolfram_alpha
+  provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
 - toolgroup_id: builtin::code_interpreter
  provider_id: code-interpreter
-preprocessors:
- preprocessor_id: builtin::basic
-  provider_id: basic
- preprocessor_id: builtin::chunking
-  provider_id: simple_chunking
+preprocessors: []
 server:
  port: 8321
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@ -36,6 +36,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export CUDA_VISIBLE_DEVICES=0

 docker run \
+    --pull always \
    --runtime nvidia \
    --gpus $CUDA_VISIBLE_DEVICES \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
@ -48,6 +49,8 @@ docker run \
    --port $INFERENCE_PORT
 ```

+Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html).
+
 If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:

 ```bash
@ -56,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run \
+    --pull always \
    --runtime nvidia \
    --gpus $CUDA_VISIBLE_DEVICES \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
@ -79,12 +83,16 @@ This method allows you to get started quickly without having to build the distri
 ```bash
 export INFERENCE_PORT=8000
 export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export LLAMA_STACK_PORT=5001
+export LLAMA_STACK_PORT=8321
+
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack

 docker run \
-  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
@ -103,7 +111,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 cd /path/to/llama-stack

 docker run \
-  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
@ -124,7 +132,7 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
 ```bash
 export INFERENCE_PORT=8000
 export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export LLAMA_STACK_PORT=5001
+export LLAMA_STACK_PORT=8321

 cd distributions/remote-vllm
 llama stack build --template remote-vllm --image-type conda
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -16,15 +16,17 @@ providers:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_URL}
+      url: ${env.VLLM_URL:http://localhost:8000/v1}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
  - provider_id: vllm-safety
    provider_type: remote::vllm
    config:
      url: ${env.SAFETY_VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
@ -39,7 +41,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -51,14 +54,26 @@ providers:
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -74,7 +89,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
  tool_runtime:
@ -99,7 +114,8 @@ providers:
    config: {}
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  preprocessing:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -16,9 +16,10 @@ providers:
  - provider_id: vllm-inference
    provider_type: remote::vllm
    config:
-      url: ${env.VLLM_URL}
+      url: ${env.VLLM_URL:http://localhost:8000/v1}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
@ -33,7 +34,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,14 +47,26 @@ providers:
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -68,7 +82,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
  tool_runtime:
@ -93,7 +107,8 @@ providers:
    config: {}
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  preprocessing:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -47,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
        provider_id="vllm-inference",
        provider_type="remote::vllm",
        config=VLLMInferenceAdapterConfig.sample_run_config(
-            url="${env.VLLM_URL}",
+            url="${env.VLLM_URL:http://localhost:8000/v1}",
        ),
    )
    embedding_provider = Provider(
@ -149,7 +149,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "INFERENCE_MODEL": (
--- a/llama_stack/templates/sambanova/doc_template.md
+++ b/llama_stack/templates/sambanova/doc_template.md
@ -49,9 +49,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -38,7 +38,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -51,7 +52,7 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
  tool_runtime:
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -7,17 +7,19 @@
 from pathlib import Path

 from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput
-from llama_stack.distribution.datatypes import (
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
+from llama_stack.distribution.datatypes import Provider, ShieldInput, ToolGroupInput
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
 from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
 from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.providers.remote.vector_io.pgvector.config import (
+    PGVectorVectorIOConfig,
+)
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -118,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMASTACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "SAMBANOVA_API_KEY": (
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -11,11 +11,14 @@ import jinja2
 import yaml
 from pydantic import BaseModel, Field

+from llama_stack.apis.datasets import DatasetPurpose
 from llama_stack.apis.models.models import ModelType
 from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput
 from llama_stack.distribution.datatypes import (
    Api,
+    BenchmarkInput,
    BuildConfig,
+    DatasetInput,
    DistributionSpec,
    ModelInput,
    Provider,
@ -29,7 +32,9 @@ from llama_stack.providers.utils.inference.model_registry import ProviderModelEn
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig


-def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]:
+def get_model_registry(
+    available_models: Dict[str, List[ProviderModelEntry]],
+) -> List[ModelInput]:
    models = []
    for provider_id, entries in available_models.items():
        for entry in entries:
@ -57,6 +62,8 @@ class RunConfigSettings(BaseModel):
    default_models: Optional[List[ModelInput]] = None
    default_shields: Optional[List[ShieldInput]] = None
    default_tool_groups: Optional[List[ToolGroupInput]] = None
+    default_datasets: Optional[List[DatasetInput]] = None
+    default_benchmarks: Optional[List[BenchmarkInput]] = None
    default_preprocessors: Optional[List[PreprocessorInput]] = None

    def run_config(
@ -115,6 +122,8 @@ class RunConfigSettings(BaseModel):
            models=self.default_models or [],
            shields=self.default_shields or [],
            tool_groups=self.default_tool_groups or [],
+            datasets=self.default_datasets or [],
+            benchmarks=self.default_benchmarks or [],
            preprocessors=self.default_preprocessors or [],
        )

@ -190,7 +199,7 @@ class DistributionTemplate(BaseModel):
                    default_models.append(
                        DefaultModel(
                            model_id=model_entry.provider_model_id,
-                            doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "",
+                            doc_string=(f"({' -- '.join(doc_parts)})" if doc_parts else ""),
                        )
                    )

@ -209,7 +218,9 @@ class DistributionTemplate(BaseModel):

        # Register YAML representer for ModelType
        yaml.add_representer(ModelType, enum_representer)
+        yaml.add_representer(DatasetPurpose, enum_representer)
        yaml.SafeDumper.add_representer(ModelType, enum_representer)
+        yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)

        for output_dir in [yaml_output_dir, doc_output_dir]:
            output_dir.mkdir(parents=True, exist_ok=True)
--- a/llama_stack/templates/tgi/doc_template.md
+++ b/llama_stack/templates/tgi/doc_template.md
@ -38,6 +38,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export CUDA_VISIBLE_DEVICES=0

 docker run --rm -it \
+  --pull always \
  -v $HOME/.cache/huggingface:/data \
  -p $INFERENCE_PORT:$INFERENCE_PORT \
  --gpus $CUDA_VISIBLE_DEVICES \
@ -58,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run --rm -it \
+  --pull always \
  -v $HOME/.cache/huggingface:/data \
  -p $SAFETY_PORT:$SAFETY_PORT \
  --gpus $CUDA_VISIBLE_DEVICES \
@ -78,9 +80,10 @@ Now you are ready to run Llama Stack with TGI as the inference provider. You can
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
@ -97,6 +100,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -32,7 +32,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,20 +46,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -31,7 +31,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -44,20 +45,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@ -143,7 +143,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "INFERENCE_MODEL": (
--- a/llama_stack/templates/together/doc_template.md
+++ b/llama_stack/templates/together/doc_template.md
@ -49,9 +49,10 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
 This method allows you to get started quickly without having to build the distribution code.

 ```bash
-LLAMA_STACK_PORT=5001
+LLAMA_STACK_PORT=8321
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -17,7 +17,7 @@ providers:
    provider_type: remote::together
    config:
      url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY}
+      api_key: ${env.TOGETHER_API_KEY:}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
@ -51,20 +51,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -98,7 +110,8 @@ providers:
    config: {}
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  preprocessing:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -17,7 +17,7 @@ providers:
    provider_type: remote::together
    config:
      url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY}
+      api_key: ${env.TOGETHER_API_KEY:}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
@ -32,7 +32,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -45,20 +46,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -92,7 +105,8 @@ providers:
    config: {}
  - provider_id: wolfram-alpha
    provider_type: remote::wolfram-alpha
-    config: {}
+    config:
+      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
  preprocessing:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -20,7 +20,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)


 def get_distribution_template() -> DistributionTemplate:
@ -168,7 +172,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "TOGETHER_API_KEY": (
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -36,7 +36,8 @@ providers:
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
-    config: {}
+    config:
+      excluded_categories: []
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -49,20 +50,32 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/vllm-gpu/trace_store.db}
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
  - provider_id: localfs
    provider_type: inline::localfs
-    config: {}
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@ -113,7 +113,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
-                "5001",
+                "8321",
                "Port for the Llama Stack distribution server",
            ),
            "INFERENCE_MODEL": (