Merge branch 'main' into feat/litellm_sambanova_usage

2025-12-28 06:41:59 +00:00 · 2025-05-05 11:49:58 -05:00 · 2025-05-05 11:49:58 -05:00 · b7f16ac7a6
commit b7f16ac7a6
parent daf0c26420 a4247ce0a8
535 changed files with 23539 additions and 8112 deletions
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -29,7 +29,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -55,10 +54,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@ -26,7 +26,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -39,9 +39,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/bedrock/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -87,9 +87,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -140,7 +137,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/cerebras/build.yaml
+++ b/llama_stack/templates/cerebras/build.yaml
@ -27,6 +27,5 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
 image_type: conda
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -34,7 +34,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
        ],
    }
@ -77,10 +76,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/cerebras/report.md
+++ b/llama_stack/templates/cerebras/report.md
@ -41,4 +41,3 @@
 |:-----|:-----|:-----|:-----|
 | /create_agent_turn | rag | test_rag_agent | ✅ |
 | /create_agent_turn | custom_tool | test_custom_tool | ❌ |
-| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@ -79,9 +79,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/trace_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -93,9 +93,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -138,7 +135,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/ci-tests/build.yaml
+++ b/llama_stack/templates/ci-tests/build.yaml
@ -27,7 +27,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@ -40,7 +40,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -71,10 +70,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]
    available_models = {
        "fireworks": MODEL_ENTRIES,
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -42,9 +42,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ci-tests/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -90,9 +90,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -236,7 +233,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/dell/build.yaml
+++ b/llama_stack/templates/dell/build.yaml
@ -28,6 +28,5 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
 image_type: conda
--- a/llama_stack/templates/dell/dell.py
+++ b/llama_stack/templates/dell/dell.py
@ -30,7 +30,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
        ],
    }
@ -87,10 +86,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -93,9 +93,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -127,7 +124,5 @@ tool_groups:
  provider_id: brave-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/dell/run.yaml
+++ b/llama_stack/templates/dell/run.yaml
@ -41,9 +41,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -89,9 +89,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -118,7 +115,5 @@ tool_groups:
  provider_id: brave-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/dependencies.json
+++ b/llama_stack/templates/dependencies.json
@ -344,6 +344,45 @@
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
  ],
+  "llama_api": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlite-vec",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
  "meta-reference-gpu": [
    "accelerate",
    "aiosqlite",
@ -394,12 +433,11 @@
    "aiosqlite",
    "blobfile",
    "chardet",
-    "emoji",
+    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
-    "langdetect",
    "matplotlib",
    "nltk",
    "numpy",
@ -411,7 +449,6 @@
    "psycopg2-binary",
    "pymongo",
    "pypdf",
-    "pythainlp",
    "redis",
    "requests",
    "scikit-learn",
@ -419,7 +456,6 @@
    "sentencepiece",
    "tqdm",
    "transformers",
-    "tree_sitter",
    "uvicorn"
  ],
  "ollama": [
@ -762,5 +798,41 @@
    "vllm",
    "sentence-transformers --no-deps",
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "watsonx": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "ibm_watson_machine_learning",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn"
  ]
 }
--- a/llama_stack/templates/dev/build.yaml
+++ b/llama_stack/templates/dev/build.yaml
@ -32,7 +32,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/dev/dev.py
+++ b/llama_stack/templates/dev/dev.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import List, Tuple

 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
@ -54,7 +53,7 @@ from llama_stack.templates.template import (
 )


-def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
+def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
    # in this template, we allow each API key to be optional
    providers = [
        (
@ -116,7 +115,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -159,10 +157,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]
    embedding_model = ModelInput(
        model_id="all-MiniLM-L6-v2",
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@ -76,9 +76,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dev/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -124,9 +124,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -537,7 +534,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@ -28,7 +28,6 @@ distribution_spec:
    - remote::brave-search
    - remote::tavily-search
    - remote::wolfram-alpha
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -40,7 +40,6 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::brave-search",
            "remote::tavily-search",
            "remote::wolfram-alpha",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -90,10 +89,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/fireworks/report.md
+++ b/llama_stack/templates/fireworks/report.md
@ -43,4 +43,3 @@
 |:-----|:-----|:-----|:-----|:-----|
 | inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
 | inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -50,9 +50,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -102,9 +102,6 @@ providers:
    provider_type: remote::wolfram-alpha
    config:
      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -255,7 +252,5 @@ tool_groups:
  provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/fireworks/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -97,9 +97,6 @@ providers:
    provider_type: remote::wolfram-alpha
    config:
      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -245,7 +242,5 @@ tool_groups:
  provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/groq/build.yaml
+++ b/llama_stack/templates/groq/build.yaml
@ -24,6 +24,5 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
 image_type: conda
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@ -33,7 +33,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
        ],
    }
@ -72,10 +71,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/groq/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -93,9 +93,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -203,7 +200,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@ -26,7 +26,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@ -32,7 +32,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -79,10 +78,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -50,9 +50,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -98,9 +98,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -135,7 +132,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-endpoint/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -93,9 +93,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -125,7 +122,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@ -27,7 +27,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@ -32,7 +32,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -80,10 +79,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -50,9 +50,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -98,9 +98,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -135,7 +132,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/hf-serverless/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -93,9 +93,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -125,7 +122,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/llama_api/init.py
+++ b/llama_stack/templates/llama_api/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .llama_api import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/llama_api/build.yaml
+++ b/llama_stack/templates/llama_api/build.yaml
@ -0,0 +1,32 @@
+version: '2'
+distribution_spec:
+  description: Distribution for running e2e tests in CI
+  providers:
+    inference:
+    - remote::llama-openai-compat
+    - inline::sentence-transformers
+    vector_io:
+    - inline::sqlite-vec
+    - remote::chromadb
+    - remote::pgvector
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
--- a/llama_stack/templates/llama_api/llama_api.py
+++ b/llama_stack/templates/llama_api/llama_api.py
@ -0,0 +1,153 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
+    SQLiteVectorIOConfig,
+)
+from llama_stack.providers.remote.inference.llama_openai_compat.config import (
+    LlamaCompatConfig,
+)
+from llama_stack.providers.remote.inference.llama_openai_compat.models import (
+    MODEL_ENTRIES as LLLAMA_MODEL_ENTRIES,
+)
+from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
+from llama_stack.providers.remote.vector_io.pgvector.config import (
+    PGVectorVectorIOConfig,
+)
+from llama_stack.templates.template import (
+    DistributionTemplate,
+    RunConfigSettings,
+    get_model_registry,
+)
+
+
+def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
+    # in this template, we allow each API key to be optional
+    providers = [
+        (
+            "llama-openai-compat",
+            LLLAMA_MODEL_ENTRIES,
+            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:}"),
+        ),
+    ]
+    inference_providers = []
+    available_models = {}
+    for provider_id, model_entries, config in providers:
+        inference_providers.append(
+            Provider(
+                provider_id=provider_id,
+                provider_type=f"remote::{provider_id}",
+                config=config,
+            )
+        )
+        available_models[provider_id] = model_entries
+    return inference_providers, available_models
+
+
+def get_distribution_template() -> DistributionTemplate:
+    inference_providers, available_models = get_inference_providers()
+    providers = {
+        "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
+        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+    name = "llama_api"
+
+    vector_io_providers = [
+        Provider(
+            provider_id="sqlite-vec",
+            provider_type="inline::sqlite-vec",
+            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_CHROMADB+chromadb}",
+            provider_type="remote::chromadb",
+            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:}"),
+        ),
+        Provider(
+            provider_id="${env.ENABLE_PGVECTOR+pgvector}",
+            provider_type="remote::pgvector",
+            config=PGVectorVectorIOConfig.sample_run_config(
+                db="${env.PGVECTOR_DB:}",
+                user="${env.PGVECTOR_USER:}",
+                password="${env.PGVECTOR_PASSWORD:}",
+            ),
+        ),
+    ]
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id=embedding_provider.provider_id,
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+
+    default_models = get_model_registry(available_models)
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Distribution for running e2e tests in CI",
+        container_image=None,
+        template_path=None,
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": inference_providers + [embedding_provider],
+                    "vector_io": vector_io_providers,
+                },
+                default_models=default_models + [embedding_model],
+                default_tool_groups=default_tool_groups,
+                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+            ),
+        },
+        run_config_env_vars={
+            "LLAMA_STACK_PORT": (
+                "8321",
+                "Port for the Llama Stack distribution server",
+            ),
+        },
+    )
--- a/llama_stack/templates/llama_api/run.yaml
+++ b/llama_stack/templates/llama_api/run.yaml
@ -0,0 +1,162 @@
+version: '2'
+image_name: llama_api
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: llama-openai-compat
+    provider_type: remote::llama-openai-compat
+    config:
+      openai_compat_api_base: https://api.llama.com/compat/v1/
+      api_key: ${env.LLAMA_API_KEY:}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/sqlite_vec.db
+  - provider_id: ${env.ENABLE_CHROMADB+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:}
+  - provider_id: ${env.ENABLE_PGVECTOR+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:localhost}
+      port: ${env.PGVECTOR_PORT:5432}
+      db: ${env.PGVECTOR_DB:}
+      user: ${env.PGVECTOR_USER:}
+      password: ${env.PGVECTOR_PASSWORD:}
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/llama_api}/registry.db
+models:
+- metadata: {}
+  model_id: Llama-3.3-70B-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-3.3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-3.3-70B-Instruct
+  model_type: llm
+- metadata: {}
+  model_id: Llama-4-Scout-17B-16E-Instruct-FP8
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: llama-openai-compat
+  provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@ -26,7 +26,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@ -69,6 +69,7 @@ LLAMA_STACK_PORT=8321
 docker run \
  -it \
  --pull always \
+  --gpu all \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -82,6 +83,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
  -it \
  --pull always \
+  --gpu all \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -36,7 +36,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -86,10 +85,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -60,9 +60,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -108,9 +108,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -145,7 +142,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -50,9 +50,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/meta-reference-gpu/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -98,9 +98,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -130,7 +127,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@ -1,6 +1,6 @@
 version: '2'
 distribution_spec:
-  description: Use NVIDIA NIM for running LLM inference and safety
+  description: Use NVIDIA NIM for running LLM inference, evaluation and safety
  providers:
    inference:
    - remote::nvidia
@ -13,11 +13,12 @@ distribution_spec:
    telemetry:
    - inline::meta-reference
    eval:
-    - inline::meta-reference
+    - remote::nvidia
    post_training:
    - remote::nvidia
    datasetio:
    - inline::localfs
+    - remote::nvidia
    scoring:
    - inline::basic
    tool_runtime:
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@ -25,14 +25,84 @@ The following models are available by default:
 {% endif %}


-### Prerequisite: API Keys
+## Prerequisites
+### NVIDIA API Keys

-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
+Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.

+### Deploy NeMo Microservices Platform
+The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
+
+## Supported Services
+Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
+
+### Inference: NVIDIA NIM
+NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
+  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
+  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
+
+The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
+
+### Datasetio API: NeMo Data Store
+The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
+
+See the [NVIDIA Datasetio docs](/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage.
+
+### Eval API: NeMo Evaluator
+The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Eval docs](/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage.
+
+### Post-Training API: NeMo Customizer
+The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the [NVIDIA Post-Training docs](/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage.
+
+### Safety API: NeMo Guardrails
+The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
+
+See the NVIDIA Safety docs for supported features and example usage.
+
+## Deploying models
+In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
+
+Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
+```sh
+# URL to NeMo NIM Proxy service
+export NEMO_URL="http://nemo.test"
+
+curl --location "$NEMO_URL/v1/deployment/model-deployments" \
+   -H 'accept: application/json' \
+   -H 'Content-Type: application/json' \
+   -d '{
+      "name": "llama-3.2-1b-instruct",
+      "namespace": "meta",
+      "config": {
+         "model": "meta/llama-3.2-1b-instruct",
+         "nim_deployment": {
+            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
+            "image_tag": "1.8.3",
+            "pvc_size": "25Gi",
+            "gpu": 1,
+            "additional_envs": {
+               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
+            }
+         }
+      }
+   }'
+```
+This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
+
+You can also remove a deployed NIM to free up GPU resources, if needed.
+```sh
+export NEMO_URL="http://nemo.test"
+
+curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
+```

 ## Running Llama Stack with NVIDIA

-You can do this via Conda (build code) or Docker which has a pre-built image.
+You can do this via Conda or venv (build code), or Docker which has a pre-built image.

 ### Via Docker

@ -54,9 +124,23 @@ docker run \
 ### Via Conda

 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
 llama stack build --template nvidia --image-type conda
 llama stack run ./run.yaml \
  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL
+```
+
+### Via venv
+
+If you've set up your local development environment, you can also build the image using your local virtual environment.
+
+```bash
+INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
+llama stack build --template nvidia --image-type venv
+llama stack run ./run.yaml \
+  --port 8321 \
+  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
  --env INFERENCE_MODEL=$INFERENCE_MODEL
 ```
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -7,6 +7,8 @@
 from pathlib import Path

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
+from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
+from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
 from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
@ -20,9 +22,9 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["remote::nvidia"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
+        "eval": ["remote::nvidia"],
        "post_training": ["remote::nvidia"],
-        "datasetio": ["inline::localfs"],
+        "datasetio": ["inline::localfs", "remote::nvidia"],
        "scoring": ["inline::basic"],
        "tool_runtime": ["inline::rag-runtime"],
    }
@ -37,6 +39,16 @@ def get_distribution_template() -> DistributionTemplate:
        provider_type="remote::nvidia",
        config=NVIDIASafetyConfig.sample_run_config(),
    )
+    datasetio_provider = Provider(
+        provider_id="nvidia",
+        provider_type="remote::nvidia",
+        config=NvidiaDatasetIOConfig.sample_run_config(),
+    )
+    eval_provider = Provider(
+        provider_id="nvidia",
+        provider_type="remote::nvidia",
+        config=NVIDIAEvalConfig.sample_run_config(),
+    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
        provider_id="nvidia",
@ -59,8 +71,8 @@ def get_distribution_template() -> DistributionTemplate:
    default_models = get_model_registry(available_models)
    return DistributionTemplate(
        name="nvidia",
-        distro_type="remote_hosted",
-        description="Use NVIDIA NIM for running LLM inference and safety",
+        distro_type="self_hosted",
+        description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
@ -69,6 +81,8 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "datasetio": [datasetio_provider],
+                    "eval": [eval_provider],
                },
                default_models=default_models,
                default_tool_groups=default_tool_groups,
@ -78,7 +92,8 @@ def get_distribution_template() -> DistributionTemplate:
                    "inference": [
                        inference_provider,
                        safety_provider,
-                    ]
+                    ],
+                    "eval": [eval_provider],
                },
                default_models=[inference_model, safety_model],
                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
@ -90,19 +105,15 @@ def get_distribution_template() -> DistributionTemplate:
                "",
                "NVIDIA API Key",
            ),
-            ## Nemo Customizer related variables
-            "NVIDIA_USER_ID": (
-                "llama-stack-user",
-                "NVIDIA User ID",
+            "NVIDIA_APPEND_API_VERSION": (
+                "True",
+                "Whether to append the API version to the base_url",
            ),
+            ## Nemo Customizer related variables
            "NVIDIA_DATASET_NAMESPACE": (
                "default",
                "NVIDIA Dataset Namespace",
            ),
-            "NVIDIA_ACCESS_POLICIES": (
-                "{}",
-                "NVIDIA Access Policies",
-            ),
            "NVIDIA_PROJECT_ID": (
                "test-project",
                "NVIDIA Project ID",
@ -119,6 +130,10 @@ def get_distribution_template() -> DistributionTemplate:
                "http://0.0.0.0:7331",
                "URL for the NeMo Guardrails Service",
            ),
+            "NVIDIA_EVALUATOR_URL": (
+                "http://0.0.0.0:7331",
+                "URL for the NeMo Evaluator Service",
+            ),
            "INFERENCE_MODEL": (
                "Llama3.1-8B-Instruct",
                "Inference model",
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@ -18,6 +18,7 @@ providers:
    config:
      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
      api_key: ${env.NVIDIA_API_KEY:}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
  - provider_id: nvidia
    provider_type: remote::nvidia
    config:
@ -49,17 +50,14 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
+  - provider_id: nvidia
+    provider_type: remote::nvidia
    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
  post_training:
  - provider_id: nvidia
    provider_type: remote::nvidia
@ -76,6 +74,13 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
+      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
  scoring:
  - provider_id: basic
    provider_type: inline::basic
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -18,6 +18,7 @@ providers:
    config:
      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
      api_key: ${env.NVIDIA_API_KEY:}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:True}
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
@ -44,17 +45,14 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/trace_store.db
  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
+  - provider_id: nvidia
+    provider_type: remote::nvidia
    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}
  post_training:
  - provider_id: nvidia
    provider_type: remote::nvidia
@ -64,13 +62,13 @@ providers:
      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:http://nemo.test}
  datasetio:
-  - provider_id: localfs
-    provider_type: inline::localfs
+  - provider_id: nvidia
+    provider_type: remote::nvidia
    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+      api_key: ${env.NVIDIA_API_KEY:}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:default}
+      project_id: ${env.NVIDIA_PROJECT_ID:test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:http://nemo.test}
  scoring:
  - provider_id: basic
    provider_type: inline::basic
@ -173,6 +171,16 @@ models:
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-90b-vision-instruct
  model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.3-70b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.3-70b-instruct
+  model_type: llm
 - metadata:
    embedding_dimension: 2048
    context_length: 8192
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@ -26,7 +26,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
    - remote::wolfram-alpha
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@ -31,7 +31,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
            "remote::wolfram-alpha",
@ -75,10 +74,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
        ToolGroupInput(
            toolgroup_id="builtin::wolfram_alpha",
            provider_id="wolfram-alpha",
--- a/llama_stack/templates/ollama/report.md
+++ b/llama_stack/templates/ollama/report.md
@ -41,4 +41,3 @@
 |:-----|:-----|:-----|:-----|
 | /create_agent_turn | rag | test_rag_agent | ✅ |
 | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -43,9 +43,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -91,9 +91,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -136,8 +133,6 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 server:
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -41,9 +41,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/ollama/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -89,9 +89,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -126,8 +123,6 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 server:
--- a/llama_stack/templates/open-benchmark/build.yaml
+++ b/llama_stack/templates/open-benchmark/build.yaml
@ -30,7 +30,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Dict, List, Tuple

 from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
 from llama_stack.apis.models.models import ModelType
@ -36,7 +35,7 @@ from llama_stack.templates.template import (
 )


-def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]:
+def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
    # in this template, we allow each API key to be optional
    providers = [
        (
@ -108,7 +107,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -146,10 +144,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    default_models = get_model_registry(available_models) + [
--- a/llama_stack/templates/open-benchmark/run.yaml
+++ b/llama_stack/templates/open-benchmark/run.yaml
@ -68,9 +68,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/open-benchmark/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/open-benchmark}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -116,9 +116,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -242,7 +239,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/passthrough/build.yaml
+++ b/llama_stack/templates/passthrough/build.yaml
@ -28,7 +28,6 @@ distribution_spec:
    - remote::brave-search
    - remote::tavily-search
    - remote::wolfram-alpha
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/passthrough/passthrough.py
+++ b/llama_stack/templates/passthrough/passthrough.py
@ -38,7 +38,6 @@ def get_distribution_template() -> DistributionTemplate:
            "remote::brave-search",
            "remote::tavily-search",
            "remote::wolfram-alpha",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -100,10 +99,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ b/llama_stack/templates/passthrough/run-with-safety.yaml
@ -50,9 +50,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -102,9 +102,6 @@ providers:
    provider_type: remote::wolfram-alpha
    config:
      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -148,7 +145,5 @@ tool_groups:
  provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/passthrough/run.yaml
+++ b/llama_stack/templates/passthrough/run.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -97,9 +97,6 @@ providers:
    provider_type: remote::wolfram-alpha
    config:
      api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -138,7 +135,5 @@ tool_groups:
  provider_id: wolfram-alpha
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ b/llama_stack/templates/remote-vllm/build.yaml
@ -27,7 +27,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
    - remote::wolfram-alpha
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@ -28,10 +28,10 @@ The following environment variables can be configured:

 ## Setting up vLLM server

-In the following sections, we'll use either AMD and NVIDIA GPUs to serve as hardware accelerators for the vLLM
+In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
 server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
 [supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
-that we only use GPUs here for demonstration purposes.
+that we only use GPUs here for demonstration purposes. Note that if you run into issues, you can include the environment variable `--env VLLM_DEBUG_LOG_API_SERVER_RESPONSE=true` (available in vLLM v0.8.3 and above) in the `docker run` command to enable log response from API server for debugging.

 ### Setting up vLLM server on AMD GPU

@ -149,6 +149,55 @@ docker run \
    --port $SAFETY_PORT
 ```

+### Setting up vLLM server on Intel GPU
+
+Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
+- [intel/vllm](https://hub.docker.com/r/intel/vllm)
+
+Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
+
+```bash
+export INFERENCE_PORT=8000
+export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+export ZE_AFFINITY_MASK=0
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $INFERENCE_PORT:$INFERENCE_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $INFERENCE_MODEL \
+    --port $INFERENCE_PORT
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_PORT=8081
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export ZE_AFFINITY_MASK=1
+
+docker run \
+    --pull always \
+    --device /dev/dri \
+    -v /dev/dri/by-path:/dev/dri/by-path \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
+    -p $SAFETY_PORT:$SAFETY_PORT \
+    --ipc=host \
+    intel/vllm:xpu \
+    --gpu-memory-utilization 0.7 \
+    --model $SAFETY_MODEL \
+    --port $SAFETY_PORT
+```
+
 ## Running Llama Stack

 Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -88,9 +88,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -102,9 +102,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -143,8 +140,6 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 server:
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -81,9 +81,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/trace_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -95,9 +95,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -131,8 +128,6 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 server:
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -34,7 +34,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
            "remote::wolfram-alpha",
@ -84,10 +83,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
        ToolGroupInput(
            toolgroup_id="builtin::wolfram_alpha",
            provider_id="wolfram-alpha",
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@ -18,7 +18,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
    - remote::wolfram-alpha
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -54,9 +54,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/trace_store.db
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
@ -68,9 +68,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -40,7 +40,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
            "remote::wolfram-alpha",
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.

 from pathlib import Path
-from typing import Dict, List, Literal, Optional, Tuple
+from typing import Literal

 import jinja2
 import yaml
@ -32,8 +32,8 @@ from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig


 def get_model_registry(
-    available_models: Dict[str, List[ProviderModelEntry]],
-) -> List[ModelInput]:
+    available_models: dict[str, list[ProviderModelEntry]],
+) -> list[ModelInput]:
    models = []
    for provider_id, entries in available_models.items():
        for entry in entries:
@ -57,18 +57,18 @@ class DefaultModel(BaseModel):


 class RunConfigSettings(BaseModel):
-    provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
-    default_models: Optional[List[ModelInput]] = None
-    default_shields: Optional[List[ShieldInput]] = None
-    default_tool_groups: Optional[List[ToolGroupInput]] = None
-    default_datasets: Optional[List[DatasetInput]] = None
-    default_benchmarks: Optional[List[BenchmarkInput]] = None
+    provider_overrides: dict[str, list[Provider]] = Field(default_factory=dict)
+    default_models: list[ModelInput] | None = None
+    default_shields: list[ShieldInput] | None = None
+    default_tool_groups: list[ToolGroupInput] | None = None
+    default_datasets: list[DatasetInput] | None = None
+    default_benchmarks: list[BenchmarkInput] | None = None

    def run_config(
        self,
        name: str,
-        providers: Dict[str, List[str]],
-        container_image: Optional[str] = None,
+        providers: dict[str, list[str]],
+        container_image: str | None = None,
    ) -> StackRunConfig:
        provider_registry = get_provider_registry()

@ -135,15 +135,15 @@ class DistributionTemplate(BaseModel):
    description: str
    distro_type: Literal["self_hosted", "remote_hosted", "ondevice"]

-    providers: Dict[str, List[str]]
-    run_configs: Dict[str, RunConfigSettings]
-    template_path: Optional[Path] = None
+    providers: dict[str, list[str]]
+    run_configs: dict[str, RunConfigSettings]
+    template_path: Path | None = None

    # Optional configuration
-    run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
-    container_image: Optional[str] = None
+    run_config_env_vars: dict[str, tuple[str, str]] | None = None
+    container_image: str | None = None

-    available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None
+    available_models_by_provider: dict[str, list[ProviderModelEntry]] | None = None

    def build_config(self) -> BuildConfig:
        return BuildConfig(
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@ -27,7 +27,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -93,9 +93,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -125,7 +122,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -44,9 +44,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/tgi/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -92,9 +92,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -124,7 +121,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@ -34,7 +34,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -83,10 +82,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@ -27,7 +27,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
    - remote::wolfram-alpha
--- a/llama_stack/templates/together/report.md
+++ b/llama_stack/templates/together/report.md
@ -43,4 +43,3 @@
 |:-----|:-----|:-----|:-----|:-----|
 | inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
 | inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| inline::meta-reference | /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -50,9 +50,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -98,9 +98,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -270,8 +267,6 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 server:
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -45,9 +45,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/together/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -93,9 +93,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -260,8 +257,6 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 - toolgroup_id: builtin::wolfram_alpha
  provider_id: wolfram-alpha
 server:
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -39,7 +39,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
            "remote::wolfram-alpha",
@ -74,10 +73,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
        ToolGroupInput(
            toolgroup_id="builtin::wolfram_alpha",
            provider_id="wolfram-alpha",
--- a/llama_stack/templates/verification/build.yaml
+++ b/llama_stack/templates/verification/build.yaml
@ -32,7 +32,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/verification/run.yaml
+++ b/llama_stack/templates/verification/run.yaml
@ -78,9 +78,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/verification/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/verification}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -126,9 +126,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -640,7 +637,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/verification/verification.py
+++ b/llama_stack/templates/verification/verification.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Dict, List, Tuple

 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
@ -51,7 +50,7 @@ from llama_stack.templates.template import (
 )


-def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]:
+def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]:
    # in this template, we allow each API key to be optional
    providers = [
        (
@ -113,7 +112,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -156,10 +154,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]
    embedding_model = ModelInput(
        model_id="all-MiniLM-L6-v2",
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@ -27,7 +27,6 @@ distribution_spec:
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
-    - inline::code-interpreter
    - inline::rag-runtime
    - remote::model-context-protocol
 image_type: conda
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -49,9 +49,9 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      service_name: "${env.OTEL_SERVICE_NAME:\u200B}"
+      service_name: ${env.OTEL_SERVICE_NAME:}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
-      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/vllm-gpu/trace_store.db}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/trace_store.db
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
@ -97,9 +97,6 @@ providers:
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
@ -129,7 +126,5 @@ tool_groups:
  provider_id: tavily-search
 - toolgroup_id: builtin::rag
  provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
  port: 8321
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@ -31,7 +31,6 @@ def get_distribution_template() -> DistributionTemplate:
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
-            "inline::code-interpreter",
            "inline::rag-runtime",
            "remote::model-context-protocol",
        ],
@ -75,10 +74,6 @@ def get_distribution_template() -> DistributionTemplate:
            toolgroup_id="builtin::rag",
            provider_id="rag-runtime",
        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
    ]

    return DistributionTemplate(
--- a/llama_stack/templates/watsonx/init.py
+++ b/llama_stack/templates/watsonx/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .watsonx import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/watsonx/build.yaml
+++ b/llama_stack/templates/watsonx/build.yaml
@ -0,0 +1,29 @@
+version: '2'
+distribution_spec:
+  description: Use watsonx for running LLM inference
+  providers:
+    inference:
+    - remote::watsonx
+    vector_io:
+    - inline::faiss
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::rag-runtime
+    - remote::model-context-protocol
+image_type: conda
--- a/llama_stack/templates/watsonx/doc_template.md
+++ b/llama_stack/templates/watsonx/doc_template.md
@ -0,0 +1,74 @@
+---
+orphan: true
+---
+# watsonx Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+{% if run_config_env_vars  %}
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+{% if default_models %}
+### Models
+
+The following models are available by default:
+
+{% for model in default_models %}
+- `{{ model.model_id }} {{ model.doc_string }}`
+{% endfor %}
+{% endif %}
+
+
+### Prerequisite: API Keys
+
+Make sure you have access to a watsonx API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
+
+
+## Running Llama Stack with watsonx
+
+You can do this via Conda (build code), venv or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ./run.yaml:/root/my-run.yaml \
+  llamastack/distribution-{{ name }} \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+```
+
+### Via Conda
+
+```bash
+llama stack build --template watsonx --image-type conda
+llama stack run ./run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env WATSONX_API_KEY=$WATSONX_API_KEY \
+  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
+```
--- a/llama_stack/templates/watsonx/run.yaml
+++ b/llama_stack/templates/watsonx/run.yaml
@ -0,0 +1,205 @@
+version: '2'
+image_name: watsonx
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:}
+      project_id: ${env.WATSONX_PROJECT_ID:}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/trace_store.db
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/meta_reference_eval.db
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/huggingface_datasetio.db
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/watsonx}/registry.db
+models:
+- metadata: {}
+  model_id: meta-llama/llama-3-3-70b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-2-13b-chat
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-2-13b-chat
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-2-13b
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-2-13b-chat
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-1-70b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-70B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-1-8b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.1-8B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-11b-vision-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-1b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-1b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-1B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-1b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-3b-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-3-2-90b-vision-instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/llama-guard-3-11b-vision
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-guard-3-11b-vision
+  model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: watsonx
+  provider_model_id: meta-llama/llama-guard-3-11b-vision
+  model_type: llm
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: tavily-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
--- a/llama_stack/templates/watsonx/watsonx.py
+++ b/llama_stack/templates/watsonx/watsonx.py
@ -0,0 +1,85 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from pathlib import Path
+
+from llama_stack.distribution.datatypes import Provider, ToolGroupInput
+from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
+from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::watsonx"],
+        "vector_io": ["inline::faiss"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::rag-runtime",
+            "remote::model-context-protocol",
+        ],
+    }
+
+    inference_provider = Provider(
+        provider_id="watsonx",
+        provider_type="remote::watsonx",
+        config=WatsonXConfig.sample_run_config(),
+    )
+
+    available_models = {
+        "watsonx": MODEL_ENTRIES,
+    }
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="tavily-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+    ]
+
+    default_models = get_model_registry(available_models)
+    return DistributionTemplate(
+        name="watsonx",
+        distro_type="remote_hosted",
+        description="Use watsonx for running LLM inference",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        available_models_by_provider=available_models,
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider],
+                },
+                default_models=default_models,
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "LLAMASTACK_PORT": (
+                "5001",
+                "Port for the Llama Stack distribution server",
+            ),
+            "WATSONX_API_KEY": (
+                "",
+                "watsonx API Key",
+            ),
+            "WATSONX_PROJECT_ID": (
+                "",
+                "watsonx Project ID",
+            ),
+        },
+    )