diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index 32e221128..ceafa96db 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -43,14 +43,17 @@ jobs:
 
       - name: Build Llama Stack
         run: |
-          uv run llama stack build --template ollama --image-type venv
+          uv run llama stack build --template starter --image-type venv
 
       - name: Start Llama Stack server in background
         if: matrix.client-type == 'http'
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+          OLLAMA_INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+          ENABLE_OLLAMA: "ollama"
+          OLLAMA_URL: "http://0.0.0.0:11434"
         run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
+          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/starter/run.yaml --image-type venv &
 
       - name: Wait for Llama Stack server to be ready
         if: matrix.client-type == 'http'
@@ -87,16 +90,18 @@ jobs:
       - name: Run Integration Tests
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
+          OLLAMA_INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" # for library tests
+          ENABLE_OLLAMA: "ollama" # for library tests
           OLLAMA_URL: "http://0.0.0.0:11434"
         run: |
           if [ "${{ matrix.client-type }}" == "library" ]; then
-            stack_config="ollama"
+            stack_config="starter"
           else
             stack_config="http://localhost:8321"
           fi
           uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
             -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
-            --text-model="meta-llama/Llama-3.2-3B-Instruct" \
+            --text-model="ollama/meta-llama/Llama-3.2-3B-Instruct" \
             --embedding-model=all-MiniLM-L6-v2
 
       - name: Check Storage and Memory Available After Tests
diff --git a/README.md b/README.md
index 7f34c3340..dc30a9cd2 100644
--- a/README.md
+++ b/README.md
@@ -139,13 +139,7 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider
 |               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
 |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html)      |
-|                   SambaNova                   |                     [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html)   |
-|                   Cerebras                    |                     [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general)                     |   [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html)   |
-|                    Ollama                     |                       [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general)                       |            [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html)            |
-|                      TGI                      |                          [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general)                          |             [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)              |
-|                   Together                    |                     [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general)                     |           [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html)           |
-|                   Fireworks                   |                    [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general)                    |          [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html)           |
-| vLLM |                  [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general)                  |         [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)          |
+|                Starter Distribution                 |           [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general)           |      [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/starter.html)      |
 
 
 ### Documentation
diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index 521071cc6..8df808b54 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -85,45 +85,13 @@ The following command will allow you to see the available templates and their co
 llama stack build --list-templates
 ```
 
-```
-------------------------------+-----------------------------------------------------------------------------+
-| Template Name                | Description                                                                 |
-+------------------------------+-----------------------------------------------------------------------------+
-| hf-serverless                | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
-+------------------------------+-----------------------------------------------------------------------------+
-| together                     | Use Together.AI for running LLM inference                                   |
-+------------------------------+-----------------------------------------------------------------------------+
-| vllm-gpu                     | Use a built-in vLLM engine for running LLM inference                        |
-+------------------------------+-----------------------------------------------------------------------------+
-| experimental-post-training   | Experimental template for post training                                     |
-+------------------------------+-----------------------------------------------------------------------------+
-| remote-vllm                  | Use (an external) vLLM server for running LLM inference                     |
-+------------------------------+-----------------------------------------------------------------------------+
-| fireworks                    | Use Fireworks.AI for running LLM inference                                  |
-+------------------------------+-----------------------------------------------------------------------------+
-| tgi                          | Use (an external) TGI server for running LLM inference                      |
-+------------------------------+-----------------------------------------------------------------------------+
-| bedrock                      | Use AWS Bedrock for running LLM inference and safety                        |
-+------------------------------+-----------------------------------------------------------------------------+
-| meta-reference-gpu           | Use Meta Reference for running LLM inference                                |
-+------------------------------+-----------------------------------------------------------------------------+
-| nvidia                       | Use NVIDIA NIM for running LLM inference                                    |
-+------------------------------+-----------------------------------------------------------------------------+
-| cerebras                     | Use Cerebras for running LLM inference                                      |
-+------------------------------+-----------------------------------------------------------------------------+
-| ollama                       | Use (an external) Ollama server for running LLM inference                   |
-+------------------------------+-----------------------------------------------------------------------------+
-| hf-endpoint                  | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
-+------------------------------+-----------------------------------------------------------------------------+
-```
-
 You may then pick a template to build your distribution with providers fitted to your liking.
 
 For example, to build a distribution with TGI as the inference provider, you can run:
 ```
-$ llama stack build --template tgi
+$ llama stack build --template starter
 ...
-You can now edit ~/.llama/distributions/llamastack-tgi/tgi-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-tgi/tgi-run.yaml`
+You can now edit ~/.llama/distributions/llamastack-starter/starter-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-starter/starter-run.yaml`
 ```
 :::
 :::{tab-item} Building from Scratch
@@ -163,26 +131,7 @@ You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack
 - The config file will be of contents like the ones in `llama_stack/templates/*build.yaml`.
 
 ```
-$ cat llama_stack/templates/ollama/build.yaml
-
-name: ollama
-distribution_spec:
-  description: Like local, but use ollama for running LLM inference
-  providers:
-    inference: remote::ollama
-    memory: inline::faiss
-    safety: inline::llama-guard
-    agents: inline::meta-reference
-    telemetry: inline::meta-reference
-image_name: ollama
-image_type: conda
-
-# If some providers are external, you can specify the path to the implementation
-external_providers_dir: ~/.llama/providers.d
-```
-
-```
-llama stack build --config llama_stack/templates/ollama/build.yaml
+llama stack build --config llama_stack/templates/starter/build.yaml
 ```
 :::
 
@@ -248,11 +197,11 @@ Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podm
 To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
 
 ```
-llama stack build --template ollama --image-type container
+llama stack build --template starter --image-type container
 ```
 
 ```
-$ llama stack build --template ollama --image-type container
+$ llama stack build --template starter --image-type container
 ...
 Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
 ...
diff --git a/docs/source/distributions/importing_as_library.md b/docs/source/distributions/importing_as_library.md
index 967a18b54..fe82d2db5 100644
--- a/docs/source/distributions/importing_as_library.md
+++ b/docs/source/distributions/importing_as_library.md
@@ -6,7 +6,7 @@ This avoids the overhead of setting up a server.
 ```bash
 # setup
 uv pip install llama-stack
-llama stack build --template ollama --image-type venv
+llama stack build --template starter --image-type venv
 ```
 
 ```python
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
deleted file mode 100644
index d7aedbfb2..000000000
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ /dev/null
@@ -1,79 +0,0 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Bedrock Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-bedrock` distribution consists of the following provider configurations:
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `remote::bedrock` |
-| safety | `remote::bedrock` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-
-### Models
-
-The following models are available by default:
-
-- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)`
-- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a AWS Bedrock API Key. You can get one by visiting [AWS Bedrock](https://aws.amazon.com/bedrock/).
-
-
-## Running Llama Stack with AWS Bedrock
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-bedrock \
-  --port $LLAMA_STACK_PORT \
-  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
-  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
-```
-
-### Via Conda
-
-```bash
-llama stack build --template bedrock --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
-  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
-```
diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md
deleted file mode 100644
index 3c4db1b75..000000000
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ /dev/null
@@ -1,67 +0,0 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Cerebras Distribution
-
-The `llamastack/distribution-cerebras` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `remote::cerebras`, `inline::sentence-transformers` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `CEREBRAS_API_KEY`: Cerebras API Key (default: ``)
-
-### Models
-
-The following models are available by default:
-
-- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)`
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Cerebras API Key. You can get one by visiting [cloud.cerebras.ai](https://cloud.cerebras.ai/).
-
-
-## Running Llama Stack with Cerebras
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
-  llamastack/distribution-cerebras \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template cerebras --image-type conda
-llama stack run ./run.yaml \
-  --port 8321 \
-  --env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
-```
diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md
deleted file mode 100644
index e09666e13..000000000
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ /dev/null
@@ -1,86 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Fireworks Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-fireworks` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| files | `inline::localfs` |
-| inference | `remote::fireworks`, `inline::sentence-transformers` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::rag-runtime`, `remote::model-context-protocol` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `FIREWORKS_API_KEY`: Fireworks.AI API Key (default: ``)
-
-### Models
-
-The following models are available by default:
-
-- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
-- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
-- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
-- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
-- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
-- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
-- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)`
-- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
-- `accounts/fireworks/models/llama4-scout-instruct-basic (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
-- `accounts/fireworks/models/llama4-maverick-instruct-basic (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
-- `nomic-ai/nomic-embed-text-v1.5 `
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Fireworks API Key. You can get one by visiting [fireworks.ai](https://fireworks.ai/).
-
-
-## Running Llama Stack with Fireworks
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-fireworks \
-  --port $LLAMA_STACK_PORT \
-  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template fireworks --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
-```
diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md
deleted file mode 100644
index 1b2194ad8..000000000
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ /dev/null
@@ -1,82 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Groq Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-groq` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `remote::groq` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime` |
-| vector_io | `inline::faiss` |
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `GROQ_API_KEY`: Groq API Key (default: ``)
-
-### Models
-
-The following models are available by default:
-
-- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `groq/llama-3.1-8b-instant `
-- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)`
-- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
-- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
-- `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
-- `groq/meta-llama/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
-- `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
-- `groq/meta-llama/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Groq API Key. You can get one by visiting [Groq](https://api.groq.com/).
-
-
-## Running Llama Stack with Groq
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-groq \
-  --port $LLAMA_STACK_PORT \
-  --env GROQ_API_KEY=$GROQ_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template groq --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env GROQ_API_KEY=$GROQ_API_KEY
-```
diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md
deleted file mode 100644
index 47e38f73d..000000000
--- a/docs/source/distributions/self_hosted_distro/nvidia.md
+++ /dev/null
@@ -1,177 +0,0 @@
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# NVIDIA Distribution
-
-The `llamastack/distribution-nvidia` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `inline::localfs`, `remote::nvidia` |
-| eval | `remote::nvidia` |
-| inference | `remote::nvidia` |
-| post_training | `remote::nvidia` |
-| safety | `remote::nvidia` |
-| scoring | `inline::basic` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `inline::rag-runtime` |
-| vector_io | `inline::faiss` |
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
-- `NVIDIA_APPEND_API_VERSION`: Whether to append the API version to the base_url (default: `True`)
-- `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`)
-- `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`)
-- `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`)
-- `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`)
-- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
-- `NVIDIA_GUARDRAILS_CONFIG_ID`: NVIDIA Guardrail Configuration ID (default: `self-check`)
-- `NVIDIA_EVALUATOR_URL`: URL for the NeMo Evaluator Service (default: `http://0.0.0.0:7331`)
-- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
-- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
-
-### Models
-
-The following models are available by default:
-
-- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
-- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
-- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
-- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
-- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
-- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
-- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
-- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
-- `meta/llama-3.3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
-- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
-- `nvidia/nv-embedqa-e5-v5 `
-- `nvidia/nv-embedqa-mistral-7b-v2 `
-- `snowflake/arctic-embed-l `
-
-
-## Prerequisites
-### NVIDIA API Keys
-
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.
-
-### Deploy NeMo Microservices Platform
-The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
-
-## Supported Services
-Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
-
-### Inference: NVIDIA NIM
-NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
-  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
-  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
-
-The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
-
-### Datasetio API: NeMo Data Store
-The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
-
-See the {repopath}`NVIDIA Datasetio docs::llama_stack/providers/remote/datasetio/nvidia/README.md` for supported features and example usage.
-
-### Eval API: NeMo Evaluator
-The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the {repopath}`NVIDIA Eval docs::llama_stack/providers/remote/eval/nvidia/README.md` for supported features and example usage.
-
-### Post-Training API: NeMo Customizer
-The NeMo Customizer microservice supports fine-tuning models. You can reference {repopath}`this list of supported models::llama_stack/providers/remote/post_training/nvidia/models.py` that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the {repopath}`NVIDIA Post-Training docs::llama_stack/providers/remote/post_training/nvidia/README.md` for supported features and example usage.
-
-### Safety API: NeMo Guardrails
-The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the {repopath}`NVIDIA Safety docs::llama_stack/providers/remote/safety/nvidia/README.md` for supported features and example usage.
-
-## Deploying models
-In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
-
-Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
-```sh
-# URL to NeMo NIM Proxy service
-export NEMO_URL="http://nemo.test"
-
-curl --location "$NEMO_URL/v1/deployment/model-deployments" \
-   -H 'accept: application/json' \
-   -H 'Content-Type: application/json' \
-   -d '{
-      "name": "llama-3.2-1b-instruct",
-      "namespace": "meta",
-      "config": {
-         "model": "meta/llama-3.2-1b-instruct",
-         "nim_deployment": {
-            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
-            "image_tag": "1.8.3",
-            "pvc_size": "25Gi",
-            "gpu": 1,
-            "additional_envs": {
-               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
-            }
-         }
-      }
-   }'
-```
-This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
-
-You can also remove a deployed NIM to free up GPU resources, if needed.
-```sh
-export NEMO_URL="http://nemo.test"
-
-curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
-```
-
-## Running Llama Stack with NVIDIA
-
-You can do this via Conda or venv (build code), or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
-  llamastack/distribution-nvidia \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
-```
-
-### Via Conda
-
-```bash
-INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
-llama stack build --template nvidia --image-type conda
-llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
-```
-
-### Via venv
-
-If you've set up your local development environment, you can also build the image using your local virtual environment.
-
-```bash
-INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
-llama stack build --template nvidia --image-type venv
-llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
-```
-
-## Example Notebooks
-For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in {repopath}`docs/notebooks/nvidia`.
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
deleted file mode 100644
index e09c79359..000000000
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ /dev/null
@@ -1,165 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Ollama Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-ollama` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| files | `inline::localfs` |
-| inference | `remote::ollama` |
-| post_training | `inline::huggingface` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `OLLAMA_URL`: URL of the Ollama server (default: `http://127.0.0.1:11434`)
-- `INFERENCE_MODEL`: Inference model loaded into the Ollama server (default: `meta-llama/Llama-3.2-3B-Instruct`)
-- `SAFETY_MODEL`: Safety model loaded into the Ollama server (default: `meta-llama/Llama-Guard-3-1B`)
-
-
-## Setting up Ollama server
-
-Please check the [Ollama Documentation](https://github.com/ollama/ollama) on how to install and run Ollama. After installing Ollama, you need to run `ollama serve` to start the server.
-
-In order to load models, you can run:
-
-```bash
-export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
-
-# ollama names this model differently, and we must use the ollama name when loading the model
-export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
-ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m
-```
-
-If you are using Llama Stack Safety / Shield APIs, you will also need to pull and run the safety model.
-
-```bash
-export SAFETY_MODEL="meta-llama/Llama-Guard-3-1B"
-
-# ollama names this model differently, and we must use the ollama name when loading the model
-export OLLAMA_SAFETY_MODEL="llama-guard3:1b"
-ollama run $OLLAMA_SAFETY_MODEL --keepalive 60m
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with Ollama as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-export LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  llamastack/distribution-ollama \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env OLLAMA_URL=http://host.docker.internal:11434
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
-  llamastack/distribution-ollama \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env OLLAMA_URL=http://host.docker.internal:11434
-```
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-export LLAMA_STACK_PORT=8321
-
-llama stack build --template ollama --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env OLLAMA_URL=http://localhost:11434
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env OLLAMA_URL=http://localhost:11434
-```
-
-
-### (Optional) Update Model Serving Configuration
-
-```{note}
-Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/models.py) for the supported Ollama models.
-```
-
-To serve a new model with `ollama`
-```bash
-ollama run <model_name>
-```
-
-To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
-```
-$ ollama ps
-NAME                         ID              SIZE      PROCESSOR    UNTIL
-llama3.2:3b-instruct-fp16    195a8c01d91e    8.6 GB    100% GPU     9 minutes from now
-```
-
-To verify that the model served by ollama is correctly connected to Llama Stack server
-```bash
-$ llama-stack-client models list
-
-Available Models
-
-┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-┃ model_type   ┃ identifier                           ┃ provider_resource_id         ┃ metadata  ┃ provider_id ┃
-┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-│ llm          │ meta-llama/Llama-3.2-3B-Instruct     │ llama3.2:3b-instruct-fp16    │           │ ollama      │
-└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
-
-Total models: 1
-```
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
deleted file mode 100644
index 6e7cf410d..000000000
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ /dev/null
@@ -1,297 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Remote vLLM Distribution
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-remote-vllm` distribution consists of the following provider configurations:
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `remote::vllm`, `inline::sentence-transformers` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-You can use this distribution if you want to run an independent vLLM server for inference.
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `INFERENCE_MODEL`: Inference model loaded into the vLLM server (default: `meta-llama/Llama-3.2-3B-Instruct`)
-- `VLLM_URL`: URL of the vLLM server with the main inference model (default: `http://host.docker.internal:5100/v1`)
-- `MAX_TOKENS`: Maximum number of tokens for generation (default: `4096`)
-- `SAFETY_VLLM_URL`: URL of the vLLM server with the safety model (default: `http://host.docker.internal:5101/v1`)
-- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
-
-
-## Setting up vLLM server
-
-In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
-server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
-[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
-that we only use GPUs here for demonstration purposes. Note that if you run into issues, you can include the environment variable `--env VLLM_DEBUG_LOG_API_SERVER_RESPONSE=true` (available in vLLM v0.8.3 and above) in the `docker run` command to enable log response from API server for debugging.
-
-### Setting up vLLM server on AMD GPU
-
-AMD provides two main vLLM container options:
-- rocm/vllm: Production-ready container
-- rocm/vllm-dev: Development container with the latest vLLM features
-
-Please check the [Blog about ROCm vLLM Usage](https://rocm.blogs.amd.com/software-tools-optimization/vllm-container/README.html) to get more details.
-
-Here is a sample script to start a ROCm vLLM server locally via Docker:
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export CUDA_VISIBLE_DEVICES=0
-export VLLM_DIMG="rocm/vllm-dev:main"
-
-docker run \
-    --pull always \
-    --ipc=host \
-    --privileged \
-    --shm-size 16g \
-    --device=/dev/kfd \
-    --device=/dev/dri \
-    --group-add video \
-    --cap-add=SYS_PTRACE \
-    --cap-add=CAP_SYS_ADMIN \
-    --security-opt seccomp=unconfined \
-    --security-opt apparmor=unconfined \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
-    -p $INFERENCE_PORT:$INFERENCE_PORT \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    $VLLM_DIMG \
-    python -m vllm.entrypoints.openai.api_server \
-    --model $INFERENCE_MODEL \
-    --port $INFERENCE_PORT
-```
-
-Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html).
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-export VLLM_DIMG="rocm/vllm-dev:main"
-
-docker run \
-    --pull always \
-    --ipc=host \
-    --privileged \
-    --shm-size 16g \
-    --device=/dev/kfd \
-    --device=/dev/dri \
-    --group-add video \
-    --cap-add=SYS_PTRACE \
-    --cap-add=CAP_SYS_ADMIN \
-    --security-opt seccomp=unconfined \
-    --security-opt apparmor=unconfined \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
-    -p $SAFETY_PORT:$SAFETY_PORT \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    $VLLM_DIMG \
-    python -m vllm.entrypoints.openai.api_server \
-    --model $SAFETY_MODEL \
-    --port $SAFETY_PORT
-```
-
-### Setting up vLLM server on NVIDIA GPU
-
-Please check the [vLLM Documentation](https://docs.vllm.ai/en/v0.5.5/serving/deploying_with_docker.html) to get a vLLM endpoint. Here is a sample script to start a vLLM server locally via Docker:
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export CUDA_VISIBLE_DEVICES=0
-
-docker run \
-    --pull always \
-    --runtime nvidia \
-    --gpus $CUDA_VISIBLE_DEVICES \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    -p $INFERENCE_PORT:$INFERENCE_PORT \
-    --ipc=host \
-    vllm/vllm-openai:latest \
-    --gpu-memory-utilization 0.7 \
-    --model $INFERENCE_MODEL \
-    --port $INFERENCE_PORT
-```
-
-Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html).
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-
-docker run \
-    --pull always \
-    --runtime nvidia \
-    --gpus $CUDA_VISIBLE_DEVICES \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    -p $SAFETY_PORT:$SAFETY_PORT \
-    --ipc=host \
-    vllm/vllm-openai:latest \
-    --gpu-memory-utilization 0.7 \
-    --model $SAFETY_MODEL \
-    --port $SAFETY_PORT
-```
-
-### Setting up vLLM server on Intel GPU
-
-Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
-- [intel/vllm](https://hub.docker.com/r/intel/vllm)
-
-Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
-export ZE_AFFINITY_MASK=0
-
-docker run \
-    --pull always \
-    --device /dev/dri \
-    -v /dev/dri/by-path:/dev/dri/by-path \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
-    -p $INFERENCE_PORT:$INFERENCE_PORT \
-    --ipc=host \
-    intel/vllm:xpu \
-    --gpu-memory-utilization 0.7 \
-    --model $INFERENCE_MODEL \
-    --port $INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export ZE_AFFINITY_MASK=1
-
-docker run \
-    --pull always \
-    --device /dev/dri \
-    -v /dev/dri/by-path:/dev/dri/by-path \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
-    -p $SAFETY_PORT:$SAFETY_PORT \
-    --ipc=host \
-    intel/vllm:xpu \
-    --gpu-memory-utilization 0.7 \
-    --model $SAFETY_MODEL \
-    --port $SAFETY_PORT
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export LLAMA_STACK_PORT=8321
-
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
-  llamastack/distribution-remote-vllm \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
-  llamastack/distribution-remote-vllm \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1 \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env SAFETY_VLLM_URL=http://host.docker.internal:$SAFETY_PORT/v1
-```
-
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export LLAMA_STACK_PORT=8321
-
-cd distributions/remote-vllm
-llama stack build --template remote-vllm --image-type conda
-
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://localhost:$INFERENCE_PORT/v1
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://localhost:$INFERENCE_PORT/v1 \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env SAFETY_VLLM_URL=http://localhost:$SAFETY_PORT/v1
-```
diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md
deleted file mode 100644
index bb4842362..000000000
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ /dev/null
@@ -1,91 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# SambaNova Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-sambanova` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| inference | `remote::sambanova`, `inline::sentence-transformers` |
-| safety | `remote::sambanova` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `SAMBANOVA_API_KEY`: SambaNova API Key (default: ``)
-
-### Models
-
-The following models are available by default:
-
-- `sambanova/Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `sambanova/Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
-- `sambanova/Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
-- `sambanova/Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
-- `sambanova/Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
-- `sambanova/Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
-- `sambanova/Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
-- `sambanova/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
-- `sambanova/Llama-4-Maverick-17B-128E-Instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
-- `sambanova/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).
-
-
-## Running Llama Stack with SambaNova
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-
-### Via Docker
-
-```bash
-LLAMA_STACK_PORT=8321
-llama stack build --template sambanova --image-type container
-docker run \
-  -it \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  distribution-sambanova \
-  --port $LLAMA_STACK_PORT \
-  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
-```
-
-
-### Via Venv
-
-```bash
-llama stack build --template sambanova --image-type venv
-llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
-```
-
-
-### Via Conda
-
-```bash
-llama stack build --template sambanova --image-type conda
-llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
-```
diff --git a/docs/source/distributions/self_hosted_distro/starter.md b/docs/source/distributions/self_hosted_distro/starter.md
new file mode 100644
index 000000000..730ccf165
--- /dev/null
+++ b/docs/source/distributions/self_hosted_distro/starter.md
@@ -0,0 +1,134 @@
+---
+orphan: true
+---
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+# Starter Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-starter` distribution is a comprehensive, multi-provider distribution that includes most of the available inference providers in Llama Stack. It's designed to be a one-stop solution for developers who want to experiment with different AI providers without having to configure each one individually.
+
+## Provider Composition
+
+The starter distribution consists of the following configurations:
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| files | `inline::localfs` |
+| inference | `remote::openai`, `remote::fireworks`, `remote::together`, `remote::ollama`, `remote::anthropic`, `remote::gemini`, `remote::groq`, `remote::sambanova`, `remote::vllm`, `remote::tgi`, `remote::cerebras`, `remote::llama-openai-compat`, `remote::nvidia`, `remote::hf::serverless`, `remote::hf::endpoint`, `inline::sentence-transformers`, `remote::passthrough` |
+| safety | `inline::llama-guard` |
+| post_training | `inline::huggingface` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| vector_io | `inline::faiss`, `inline::sqlite-vec`, `remote::chromadb`, `remote::pgvector` |
+
+## Inference Providers
+
+The starter distribution includes a comprehensive set of inference providers:
+
+- **OpenAI**: GPT-4, GPT-3.5, O1, O3, O4 models and text embeddings - point to the relevant provider
+  configuration documentation for more details
+- **Fireworks**: Llama 3.1, 3.2, 3.3, 4 Scout, 4 Maverick models and embeddings
+- **Together**: Llama 3.1, 3.2, 3.3, 4 Scout, 4 Maverick models and embeddings
+- **Anthropic**: Claude 3.5 Sonnet, Claude 3.7 Sonnet, Claude 3.5 Haiku, and Voyage embeddings
+- **Gemini**: Gemini 1.5, 2.0, 2.5 models and text embeddings
+- **Groq**: Fast Llama models (3.1, 3.2, 3.3, 4 Scout, 4 Maverick)
+- **SambaNova**: Llama 3.1, 3.2, 3.3, 4 Scout, 4 Maverick models
+- **Cerebras**: Cerebras AI models
+- **NVIDIA**: NVIDIA NIM models
+- **HuggingFace**: Serverless and endpoint models
+- **Bedrock**: AWS Bedrock models
+- **Passthrough**: Passthrough provider - use this to connect to any other inference provider that is not supported by Llama Stack
+- **Ollama**: Local Ollama models
+- **vLLM**: remote vLLM server
+- **TGI**: Text Generation Inference server - Dell Enterprise Hub's custom TGI container too (use `DEH_URL`)
+- **Sentence Transformers**: Local embedding models
+
+All providers are **disabled** by default. So you need to enable them by setting the environment
+variables. See [Enabling Providers](#enabling-providers) for more details.
+
+## Vector Providers
+
+The starter distribution includes a comprehensive set of vector providers:
+
+- **FAISS**: Local FAISS vector store - enabled by default
+- **SQLite**: Local SQLite vector store - disabled by default
+- **ChromaDB**: Remote ChromaDB server - disabled by default
+- **PGVector**: Remote PGVector server - disabled by default
+
+## Enabling Providers
+
+You can enable specific providers by setting their provider ID to a string value using environment
+variables.
+
+For instance, to enable the Ollama provider, you can set the `ENABLE_OLLAMA` environment variable to `ollama`.
+
+```bash
+export ENABLE_OLLAMA=ollama
+```
+
+To disable a provider, you can set the environment variable to `ENABLE_OLLAMA=__disabled__`.
+
+## Running the Distribution
+
+You can run the starter distribution via Docker or directly using the Llama Stack CLI.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=8321
+docker run \
+  -it \
+  --pull always \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -e ENABLE_OLLAMA=ollama \
+  -e OLLAMA_INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  llamastack/distribution-starter \
+  --port $LLAMA_STACK_PORT
+```
+
+You can also use the `llama stack run` command to run the distribution.
+
+```bash
+llama stack run distributions/starter/run.yaml \
+  --port 8321 \
+  --env ENABLE_OLLAMA=ollama \
+  --env OLLAMA_INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+```
+
+## Storage
+
+The starter distribution uses SQLite for local storage of various components:
+
+- **Metadata store**: `~/.llama/distributions/starter/registry.db`
+- **Inference store**: `~/.llama/distributions/starter/inference_store.db`
+- **FAISS store**: `~/.llama/distributions/starter/faiss_store.db`
+- **SQLite vector store**: `~/.llama/distributions/starter/sqlite_vec.db`
+- **Files metadata**: `~/.llama/distributions/starter/files_metadata.db`
+- **Agents store**: `~/.llama/distributions/starter/agents_store.db`
+- **Responses store**: `~/.llama/distributions/starter/responses_store.db`
+- **Trace store**: `~/.llama/distributions/starter/trace_store.db`
+- **Evaluation store**: `~/.llama/distributions/starter/meta_reference_eval.db`
+- **Dataset I/O stores**: Various HuggingFace and local filesystem stores
+
+## Benefits of the Starter Distribution
+
+1. **Comprehensive Coverage**: Includes most popular AI providers in one distribution
+2. **Flexible Configuration**: Easy to enable/disable providers based on your needs
+3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware
+4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed
+5. **Production Ready**: Includes safety, evaluation, and telemetry components
+6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools
+
+The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends.
diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md
deleted file mode 100644
index 24f9d03ec..000000000
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ /dev/null
@@ -1,149 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-
-# TGI Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-tgi` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `remote::tgi`, `inline::sentence-transformers` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`)
-- `TGI_URL`: URL of the TGI server with the main inference model (default: `http://127.0.0.1:8080/v1`)
-- `TGI_SAFETY_URL`: URL of the TGI server with the safety model (default: `http://127.0.0.1:8081/v1`)
-- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
-
-
-## Setting up TGI server
-
-Please check the [TGI Getting Started Guide](https://github.com/huggingface/text-generation-inference?tab=readme-ov-file#get-started) to get a TGI endpoint. Here is a sample script to start a TGI server locally via Docker:
-
-```bash
-export INFERENCE_PORT=8080
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export CUDA_VISIBLE_DEVICES=0
-
-docker run --rm -it \
-  --pull always \
-  -v $HOME/.cache/huggingface:/data \
-  -p $INFERENCE_PORT:$INFERENCE_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference:2.3.1 \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --cuda-memory-fraction 0.7 \
-  --model-id $INFERENCE_MODEL \
-  --port $INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-
-docker run --rm -it \
-  --pull always \
-  -v $HOME/.cache/huggingface:/data \
-  -p $SAFETY_PORT:$SAFETY_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference:2.3.1 \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --model-id $SAFETY_MODEL \
-  --port $SAFETY_PORT
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-tgi \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://host.docker.internal:$INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
-  llamastack/distribution-tgi \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://host.docker.internal:$INFERENCE_PORT \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env TGI_SAFETY_URL=http://host.docker.internal:$SAFETY_PORT
-```
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-llama stack build --template tgi --image-type conda
-llama stack run ./run.yaml
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://127.0.0.1:$INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://127.0.0.1:$INFERENCE_PORT \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env TGI_SAFETY_URL=http://127.0.0.1:$SAFETY_PORT
-```
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
deleted file mode 100644
index adfc2c472..000000000
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ /dev/null
@@ -1,86 +0,0 @@
----
-orphan: true
----
-<!-- This file was auto-generated by distro_codegen.py, please edit source -->
-# Together Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-together` distribution consists of the following provider configurations.
-
-| API | Provider(s) |
-|-----|-------------|
-| agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
-| eval | `inline::meta-reference` |
-| inference | `remote::together`, `inline::sentence-transformers` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
-| telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
-| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
-
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
-- `TOGETHER_API_KEY`: Together.AI API Key (default: ``)
-
-### Models
-
-The following models are available by default:
-
-- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)`
-- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)`
-- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
-- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)`
-- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
-- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
-- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
-- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
-- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
-- `togethercomputer/m2-bert-80M-8k-retrieval `
-- `togethercomputer/m2-bert-80M-32k-retrieval `
-- `meta-llama/Llama-4-Scout-17B-16E-Instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct, together/meta-llama/Llama-4-Scout-17B-16E-Instruct)`
-- `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct, together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8)`
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Together API Key. You can get one by visiting [together.xyz](https://together.xyz/).
-
-
-## Running Llama Stack with Together
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-together \
-  --port $LLAMA_STACK_PORT \
-  --env TOGETHER_API_KEY=$TOGETHER_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template together --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env TOGETHER_API_KEY=$TOGETHER_API_KEY
-```
diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md
index e40a4903a..d80ec3554 100644
--- a/docs/source/getting_started/detailed_tutorial.md
+++ b/docs/source/getting_started/detailed_tutorial.md
@@ -58,7 +58,7 @@ which defines the providers and their settings.
 Now let's build and run the Llama Stack config for Ollama.
 
 ```bash
-INFERENCE_MODEL=llama3.2:3b llama stack build --template ollama --image-type venv --run
+INFERENCE_MODEL=llama3.2:3b llama stack build --template starter --image-type venv --run
 ```
 :::
 :::{tab-item} Using `conda`
@@ -69,7 +69,7 @@ which defines the providers and their settings.
 Now let's build and run the Llama Stack config for Ollama.
 
 ```bash
-INFERENCE_MODEL=llama3.2:3b llama stack build --template ollama --image-type conda  --image-name llama3-3b-conda --run
+INFERENCE_MODEL=llama3.2:3b llama stack build --template starter --image-type conda  --image-name llama3-3b-conda --run
 ```
 :::
 :::{tab-item} Using a Container
diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md
index ee7cdd4a9..418a30eb3 100644
--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@@ -17,7 +17,7 @@ ollama run llama3.2:3b --keepalive 60m
 #### Step 2: Run the Llama Stack server
 We will use `uv` to run the Llama Stack server.
 ```bash
-INFERENCE_MODEL=llama3.2:3b uv run --with llama-stack llama stack build --template ollama --image-type venv --run
+INFERENCE_MODEL=llama3.2:3b uv run --with llama-stack llama stack build --template starter --image-type venv --run
 ```
 #### Step 3: Run the demo
 Now open up a new terminal and copy the following script into a file named `demo_script.py`.
diff --git a/docs/source/providers/post_training/huggingface.md b/docs/source/providers/post_training/huggingface.md
index c342203a8..c7896aaf4 100644
--- a/docs/source/providers/post_training/huggingface.md
+++ b/docs/source/providers/post_training/huggingface.md
@@ -23,7 +23,7 @@ To use the HF SFTTrainer in your Llama Stack project, follow these steps:
 You can access the HuggingFace trainer via the `ollama` distribution:
 
 ```bash
-llama stack build --template ollama --image-type venv
+llama stack build --template starter --image-type venv
 llama stack run --image-type venv ~/.llama/distributions/ollama/ollama-run.yaml
 ```
 
diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md
index 96f9768de..19c171de2 100644
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@@ -83,7 +83,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
 1. **Build the Llama Stack**:
    Build the Llama Stack using the `ollama` template:
    ```bash
-   llama stack build --template ollama --image-type conda
+   llama stack build --template starter --image-type conda
    ```
    **Expected Output:**
    ```bash
diff --git a/llama_stack/distribution/providers.py b/llama_stack/distribution/providers.py
index 1d9c1f4e9..7095ffd18 100644
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@@ -84,7 +84,13 @@ class ProviderImpl(Providers):
                 Each API maps to a dictionary of provider IDs to their health responses.
         """
         providers_health: dict[str, dict[str, HealthResponse]] = {}
-        timeout = 1.0
+
+        # The timeout has to be long enough to allow all the providers to be checked, especially in
+        # the case of the inference router health check since it checks all registered inference
+        # providers.
+        # The timeout must not be equal to the one set by health method for a given implementation,
+        # otherwise we will miss some providers.
+        timeout = 3.0
 
         async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
             # Skip special implementations (inspect/providers) that don't have provider specs
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index c86880669..3b82afd05 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -98,6 +98,10 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
 
         method = getattr(impls[api], register_method)
         for obj in objects:
+            # Do not register models on disabled providers
+            if hasattr(obj, "provider_id") and obj.provider_id is not None and obj.provider_id == "__disabled__":
+                logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled provider.")
+                continue
             # In complex templates, like our starter template, we may have dynamic model ids
             # given by environment variables. This allows those environment variables to have
             # a default value of __disabled__ to skip registration of the model if not set.
@@ -106,6 +110,7 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
                 and obj.provider_model_id is not None
                 and "__disabled__" in obj.provider_model_id
             ):
+                logger.debug(f"Skipping {rsrc.capitalize()} registration for disabled model.")
                 continue
             # we want to maintain the type information in arguments to method.
             # instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/llama_stack/providers/remote/inference/cerebras/config.py
index 81312ec76..5ad7376fc 100644
--- a/llama_stack/providers/remote/inference/cerebras/config.py
+++ b/llama_stack/providers/remote/inference/cerebras/config.py
@@ -26,8 +26,8 @@ class CerebrasImplConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY}", **kwargs) -> dict[str, Any]:
         return {
             "base_url": DEFAULT_BASE_URL,
-            "api_key": "${env.CEREBRAS_API_KEY}",
+            "api_key": api_key,
         }
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/llama_stack/providers/remote/inference/llama_openai_compat/config.py
index 57bc7240d..339d241ca 100644
--- a/llama_stack/providers/remote/inference/llama_openai_compat/config.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/config.py
@@ -31,7 +31,7 @@ class LlamaCompatConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
+    def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY:}", **kwargs) -> dict[str, Any]:
         return {
             "openai_compat_api_base": "https://api.llama.com/compat/v1/",
             "api_key": api_key,
diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/llama_stack/providers/remote/inference/nvidia/config.py
index 6369928bb..c7f6f4ba6 100644
--- a/llama_stack/providers/remote/inference/nvidia/config.py
+++ b/llama_stack/providers/remote/inference/nvidia/config.py
@@ -53,9 +53,15 @@ class NVIDIAConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+    def sample_run_config(
+        cls,
+        url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
+        api_key: str = "${env.NVIDIA_API_KEY:+}",
+        append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
+        **kwargs,
+    ) -> dict[str, Any]:
         return {
-            "url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
-            "api_key": "${env.NVIDIA_API_KEY:+}",
-            "append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
+            "url": url,
+            "api_key": api_key,
+            "append_api_version": append_api_version,
         }
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py
index b2cc4d8a7..0145810a8 100644
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@@ -13,13 +13,9 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
 
 class OllamaImplConfig(BaseModel):
     url: str = DEFAULT_OLLAMA_URL
-    raise_on_connect_error: bool = True
 
     @classmethod
-    def sample_run_config(
-        cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
-    ) -> dict[str, Any]:
+    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
         return {
             "url": url,
-            "raise_on_connect_error": raise_on_connect_error,
         }
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index e9df0dcc8..10d11a617 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -91,7 +91,6 @@ class OllamaInferenceAdapter(
     def __init__(self, config: OllamaImplConfig) -> None:
         self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
         self.url = config.url
-        self.raise_on_connect_error = config.raise_on_connect_error
 
     @property
     def client(self) -> AsyncClient:
@@ -105,10 +104,7 @@ class OllamaInferenceAdapter(
         logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
         health_response = await self.health()
         if health_response["status"] == HealthStatus.ERROR:
-            if self.raise_on_connect_error:
-                raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
-            else:
-                logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
+            raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
 
     async def health(self) -> HealthResponse:
         """
diff --git a/llama_stack/providers/remote/inference/passthrough/config.py b/llama_stack/providers/remote/inference/passthrough/config.py
index ce41495ce..647b2db46 100644
--- a/llama_stack/providers/remote/inference/passthrough/config.py
+++ b/llama_stack/providers/remote/inference/passthrough/config.py
@@ -24,8 +24,10 @@ class PassthroughImplConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, **kwargs) -> dict[str, Any]:
+    def sample_run_config(
+        cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
+    ) -> dict[str, Any]:
         return {
-            "url": "${env.PASSTHROUGH_URL}",
-            "api_key": "${env.PASSTHROUGH_API_KEY}",
+            "url": url,
+            "api_key": api_key,
         }
diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/llama_stack/providers/remote/inference/tgi/config.py
index 3d632c9d8..d4448871f 100644
--- a/llama_stack/providers/remote/inference/tgi/config.py
+++ b/llama_stack/providers/remote/inference/tgi/config.py
@@ -17,7 +17,11 @@ class TGIImplConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.TGI_URL}", **kwargs):
+    def sample_run_config(
+        cls,
+        url: str = "${env.TGI_URL}",
+        **kwargs,
+    ):
         return {
             "url": url,
         }
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index 292d74ef8..031200d4a 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -327,7 +327,6 @@ class InferenceEndpointAdapter(_HfAdapter):
         # Get the inference endpoint details
         api = HfApi(token=config.api_token.get_secret_value())
         endpoint = api.get_inference_endpoint(config.endpoint_name)
-
         # Wait for the endpoint to be ready (if not already)
         endpoint.wait(timeout=60)
 
diff --git a/llama_stack/templates/bedrock/__init__.py b/llama_stack/templates/bedrock/__init__.py
deleted file mode 100644
index 4e7965550..000000000
--- a/llama_stack/templates/bedrock/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .bedrock import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
deleted file mode 100644
index bc3a9304f..000000000
--- a/llama_stack/templates/bedrock/bedrock.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.distribution.datatypes import Provider, ToolGroupInput
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::bedrock"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["remote::bedrock"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-    name = "bedrock"
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    available_models = {
-        "bedrock": MODEL_ENTRIES,
-    }
-    default_models = get_model_registry(available_models)
-
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use AWS Bedrock for running LLM inference and safety",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=default_models,
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
deleted file mode 100644
index 1a2c883fa..000000000
--- a/llama_stack/templates/bedrock/build.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use AWS Bedrock for running LLM inference and safety
-  providers:
-    inference:
-    - remote::bedrock
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - remote::bedrock
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md
deleted file mode 100644
index e93bb92f2..000000000
--- a/llama_stack/templates/bedrock/doc_template.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Bedrock Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
-
-{{ providers_table }}
-
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a AWS Bedrock API Key. You can get one by visiting [AWS Bedrock](https://aws.amazon.com/bedrock/).
-
-
-## Running Llama Stack with AWS Bedrock
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
-  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
-```
-
-### Via Conda
-
-```bash
-llama stack build --template {{ name }} --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
-  --env AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
-  --env AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \
-  --env AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
-```
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
deleted file mode 100644
index 61bc83f02..000000000
--- a/llama_stack/templates/bedrock/run.yaml
+++ /dev/null
@@ -1,147 +0,0 @@
-version: 2
-image_name: bedrock
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: bedrock
-    provider_type: remote::bedrock
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
-  safety:
-  - provider_id: bedrock
-    provider_type: remote::bedrock
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db
-models:
-- metadata: {}
-  model_id: meta.llama3-1-8b-instruct-v1:0
-  provider_id: bedrock
-  provider_model_id: meta.llama3-1-8b-instruct-v1:0
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: bedrock
-  provider_model_id: meta.llama3-1-8b-instruct-v1:0
-  model_type: llm
-- metadata: {}
-  model_id: meta.llama3-1-70b-instruct-v1:0
-  provider_id: bedrock
-  provider_model_id: meta.llama3-1-70b-instruct-v1:0
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: bedrock
-  provider_model_id: meta.llama3-1-70b-instruct-v1:0
-  model_type: llm
-- metadata: {}
-  model_id: meta.llama3-1-405b-instruct-v1:0
-  provider_id: bedrock
-  provider_model_id: meta.llama3-1-405b-instruct-v1:0
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: bedrock
-  provider_model_id: meta.llama3-1-405b-instruct-v1:0
-  model_type: llm
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/cerebras/__init__.py b/llama_stack/templates/cerebras/__init__.py
deleted file mode 100644
index 9f9929b52..000000000
--- a/llama_stack/templates/cerebras/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .cerebras import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml
deleted file mode 100644
index ecd0ac418..000000000
--- a/llama_stack/templates/cerebras/build.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use Cerebras for running LLM inference
-  providers:
-    inference:
-    - remote::cerebras
-    - inline::sentence-transformers
-    safety:
-    - inline::llama-guard
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    agents:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    telemetry:
-    - inline::meta-reference
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
deleted file mode 100644
index f341a88c1..000000000
--- a/llama_stack/templates/cerebras/cerebras.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
-from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::cerebras", "inline::sentence-transformers"],
-        "safety": ["inline::llama-guard"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "agents": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "telemetry": ["inline::meta-reference"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-        ],
-    }
-
-    name = "cerebras"
-    inference_provider = Provider(
-        provider_id="cerebras",
-        provider_type="remote::cerebras",
-        config=CerebrasImplConfig.sample_run_config(),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-
-    available_models = {
-        "cerebras": MODEL_ENTRIES,
-    }
-    default_models = get_model_registry(available_models)
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name="cerebras",
-        distro_type="self_hosted",
-        description="Use Cerebras for running LLM inference",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=default_models + [embedding_model],
-                default_shields=[],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "CEREBRAS_API_KEY": (
-                "",
-                "Cerebras API Key",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md
deleted file mode 100644
index 5cae2b2da..000000000
--- a/llama_stack/templates/cerebras/doc_template.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Cerebras Distribution
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Cerebras API Key. You can get one by visiting [cloud.cerebras.ai](https://cloud.cerebras.ai/).
-
-
-## Running Llama Stack with Cerebras
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template cerebras --image-type conda
-llama stack run ./run.yaml \
-  --port 8321 \
-  --env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
-```
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
deleted file mode 100644
index 9bd8fcc7c..000000000
--- a/llama_stack/templates/cerebras/run.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-version: 2
-image_name: cerebras
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: cerebras
-    provider_type: remote::cerebras
-    config:
-      base_url: https://api.cerebras.ai
-      api_key: ${env.CEREBRAS_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/responses_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/trace_store.db
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/inference_store.db
-models:
-- metadata: {}
-  model_id: llama3.1-8b
-  provider_id: cerebras
-  provider_model_id: llama3.1-8b
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: cerebras
-  provider_model_id: llama3.1-8b
-  model_type: llm
-- metadata: {}
-  model_id: llama-3.3-70b
-  provider_id: cerebras
-  provider_model_id: llama-3.3-70b
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: cerebras
-  provider_model_id: llama-3.3-70b
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/ci-tests/__init__.py b/llama_stack/templates/ci-tests/__init__.py
deleted file mode 100644
index b309587f5..000000000
--- a/llama_stack/templates/ci-tests/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .ci_tests import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml
deleted file mode 100644
index c061d0793..000000000
--- a/llama_stack/templates/ci-tests/build.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-version: 2
-distribution_spec:
-  description: Distribution for running e2e tests in CI
-  providers:
-    inference:
-    - remote::fireworks
-    - inline::sentence-transformers
-    vector_io:
-    - inline::sqlite-vec
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py
deleted file mode 100644
index 7de8069ae..000000000
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
-    SQLiteVectorIOConfig,
-)
-from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::fireworks", "inline::sentence-transformers"],
-        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-    name = "ci-tests"
-    inference_provider = Provider(
-        provider_id="fireworks",
-        provider_type="remote::fireworks",
-        config=FireworksImplConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="sqlite-vec",
-        provider_type="inline::sqlite-vec",
-        config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-    available_models = {
-        "fireworks": MODEL_ENTRIES,
-    }
-    default_models = get_model_registry(available_models)
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Distribution for running e2e tests in CI",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "FIREWORKS_API_KEY": (
-                "",
-                "Fireworks API Key",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml
deleted file mode 100644
index 4b7de1c0c..000000000
--- a/llama_stack/templates/ci-tests/run.yaml
+++ /dev/null
@@ -1,243 +0,0 @@
-version: 2
-image_name: ci-tests
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: fireworks
-    provider_type: remote::fireworks
-    config:
-      url: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: sqlite-vec
-    provider_type: inline::sqlite-vec
-    config:
-      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db
-models:
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-8b
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
-- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: nomic-ai/nomic-embed-text-v1.5
-  provider_id: fireworks
-  provider_model_id: nomic-ai/nomic-embed-text-v1.5
-  model_type: embedding
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/dell/__init__.py b/llama_stack/templates/dell/__init__.py
deleted file mode 100644
index 143add56e..000000000
--- a/llama_stack/templates/dell/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .dell import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml
deleted file mode 100644
index ff8d58a08..000000000
--- a/llama_stack/templates/dell/build.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-version: 2
-distribution_spec:
-  description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
-    container
-  providers:
-    inference:
-    - remote::tgi
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py
deleted file mode 100644
index 5a6f52a89..000000000
--- a/llama_stack/templates/dell/dell.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::tgi", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-        ],
-    }
-    name = "dell"
-    inference_provider = Provider(
-        provider_id="tgi0",
-        provider_type="remote::tgi",
-        config={
-            "url": "${env.DEH_URL}",
-        },
-    )
-    safety_inference_provider = Provider(
-        provider_id="tgi1",
-        provider_type="remote::tgi",
-        config={
-            "url": "${env.DEH_SAFETY_URL}",
-        },
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    chromadb_provider = Provider(
-        provider_id="chromadb",
-        provider_type="remote::chromadb",
-        config={
-            "url": "${env.CHROMA_URL}",
-        },
-    )
-
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="tgi0",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="tgi1",
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="brave-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
-        container_image=None,
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [chromadb_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        safety_inference_provider,
-                        embedding_provider,
-                    ],
-                    "vector_io": [chromadb_provider],
-                },
-                default_models=[inference_model, safety_model, embedding_model],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "DEH_URL": (
-                "http://0.0.0.0:8181",
-                "URL for the Dell inference server",
-            ),
-            "DEH_SAFETY_URL": (
-                "http://0.0.0.0:8282",
-                "URL for the Dell safety inference server",
-            ),
-            "CHROMA_URL": (
-                "http://localhost:6601",
-                "URL for the Chroma server",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model loaded into the TGI server",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Name of the safety (Llama-Guard) model to use",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/dell/doc_template.md b/llama_stack/templates/dell/doc_template.md
deleted file mode 100644
index 6bdd7f81c..000000000
--- a/llama_stack/templates/dell/doc_template.md
+++ /dev/null
@@ -1,178 +0,0 @@
----
-orphan: true
----
-
-# Dell Distribution of Llama Stack
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-You can use this distribution if you have GPUs and want to run an independent TGI or Dell Enterprise Hub container for running inference.
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-
-## Setting up Inference server using Dell Enterprise Hub's custom TGI container.
-
-NOTE: This is a placeholder to run inference with TGI. This will be updated to use [Dell Enterprise Hub's containers](https://dell.huggingface.co/authenticated/models) once verified.
-
-```bash
-export INFERENCE_PORT=8181
-export DEH_URL=http://0.0.0.0:$INFERENCE_PORT
-export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
-export CHROMADB_HOST=localhost
-export CHROMADB_PORT=6601
-export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT
-export CUDA_VISIBLE_DEVICES=0
-export LLAMA_STACK_PORT=8321
-
-docker run --rm -it \
-  --pull always \
-  --network host \
-  -v $HOME/.cache/huggingface:/data \
-  -e HF_TOKEN=$HF_TOKEN \
-  -p $INFERENCE_PORT:$INFERENCE_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --cuda-memory-fraction 0.7 \
-  --model-id $INFERENCE_MODEL \
-  --port $INFERENCE_PORT --hostname 0.0.0.0
-```
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_INFERENCE_PORT=8282
-export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-
-docker run --rm -it \
-  --pull always \
-  --network host \
-  -v $HOME/.cache/huggingface:/data \
-  -e HF_TOKEN=$HF_TOKEN \
-  -p $SAFETY_INFERENCE_PORT:$SAFETY_INFERENCE_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --cuda-memory-fraction 0.7 \
-  --model-id $SAFETY_MODEL \
-  --hostname 0.0.0.0 \
-  --port $SAFETY_INFERENCE_PORT
-```
-
-## Dell distribution relies on ChromaDB for vector database usage
-
-You can start a chroma-db easily using docker.
-```bash
-# This is where the indices are persisted
-mkdir -p $HOME/chromadb
-
-podman run --rm -it \
-  --network host \
-  --name chromadb \
-  -v $HOME/chromadb:/chroma/chroma \
-  -e IS_PERSISTENT=TRUE \
-  chromadb/chroma:latest \
-  --port $CHROMADB_PORT \
-  --host $CHROMADB_HOST
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-docker run -it \
-  --pull always \
-  --network host \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v $HOME/.llama:/root/.llama \
-  # NOTE: mount the llama-stack directory if testing local changes else not needed
-  -v /home/hjshah/git/llama-stack:/app/llama-stack-source \
-  # localhost/distribution-dell:dev if building / testing locally
-  llamastack/distribution-{{ name }}\
-  --port $LLAMA_STACK_PORT  \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
-
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-export SAFETY_INFERENCE_PORT=8282
-export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v $HOME/.llama:/root/.llama \
-  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
-```
-
-### Via Conda
-
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-llama stack build --template {{ name }} --image-type conda
-llama stack run {{ name }}
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
-```
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
deleted file mode 100644
index 7f1d0a8c0..000000000
--- a/llama_stack/templates/dell/run-with-safety.yaml
+++ /dev/null
@@ -1,134 +0,0 @@
-version: 2
-image_name: dell
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: tgi0
-    provider_type: remote::tgi
-    config:
-      url: ${env.DEH_URL}
-  - provider_id: tgi1
-    provider_type: remote::tgi
-    config:
-      url: ${env.DEH_SAFETY_URL}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config:
-      url: ${env.CHROMA_URL}
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: tgi0
-  model_type: llm
-- metadata: {}
-  model_id: ${env.SAFETY_MODEL}
-  provider_id: tgi1
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: ${env.SAFETY_MODEL}
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: brave-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
deleted file mode 100644
index 310f3cc20..000000000
--- a/llama_stack/templates/dell/run.yaml
+++ /dev/null
@@ -1,125 +0,0 @@
-version: 2
-image_name: dell
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: tgi0
-    provider_type: remote::tgi
-    config:
-      url: ${env.DEH_URL}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: chromadb
-    provider_type: remote::chromadb
-    config:
-      url: ${env.CHROMA_URL}
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: tgi0
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: brave-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/experimental-post-training/build.yaml b/llama_stack/templates/experimental-post-training/build.yaml
deleted file mode 100644
index 55cd189c6..000000000
--- a/llama_stack/templates/experimental-post-training/build.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-version: '2'
-name: experimental-post-training
-distribution_spec:
-  description: Experimental template for post training
-  container_image: null
-  providers:
-    inference:
-    - inline::meta-reference
-    - remote::ollama
-    eval:
-    - inline::meta-reference
-    scoring:
-    - inline::basic
-    - inline::braintrust
-    post_training:
-    - inline::huggingface
-    datasetio:
-    - inline::localfs
-    - remote::huggingface
-    telemetry:
-    - inline::meta-reference
-    agents:
-    - inline::meta-reference
-    safety:
-    - inline::llama-guard
-    vector_io:
-    - inline::faiss
-    tool_runtime:
-    - remote::brave-search
-image_type: conda
diff --git a/llama_stack/templates/experimental-post-training/run.yaml b/llama_stack/templates/experimental-post-training/run.yaml
deleted file mode 100644
index a74aa3647..000000000
--- a/llama_stack/templates/experimental-post-training/run.yaml
+++ /dev/null
@@ -1,107 +0,0 @@
-version: '2'
-image_name: experimental-post-training
-container_image: null
-conda_env: experimental-post-training
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- vector_io
-- safety
-- scoring
-- telemetry
-- post_training
-- tool_runtime
-providers:
-  inference:
-  - provider_id: meta-reference-inference
-    provider_type: inline::meta-reference
-    config:
-      max_seq_len: 4096
-      checkpoint_dir: null
-      create_distributed_process_group: False
-  - provider_id: ollama
-    provider_type: remote::ollama
-    config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  datasetio:
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/localfs_datasetio.db
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/huggingface}/huggingface_datasetio.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config: {}
-  post_training:
-  - provider_id: huggingface
-    provider_type: inline::huggingface
-    config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/agents_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/faiss_store.db
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-
-
-metadata_store:
-  namespace: null
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/experimental-post-training}/registry.db
-models: []
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
diff --git a/llama_stack/templates/fireworks/__init__.py b/llama_stack/templates/fireworks/__init__.py
deleted file mode 100644
index 1d85c66db..000000000
--- a/llama_stack/templates/fireworks/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .fireworks import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
deleted file mode 100644
index eb08c1d43..000000000
--- a/llama_stack/templates/fireworks/build.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use Fireworks.AI for running LLM inference
-  providers:
-    inference:
-    - remote::fireworks
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    files:
-    - inline::localfs
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - remote::wolfram-alpha
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md
deleted file mode 100644
index ba0205db0..000000000
--- a/llama_stack/templates/fireworks/doc_template.md
+++ /dev/null
@@ -1,69 +0,0 @@
----
-orphan: true
----
-# Fireworks Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Fireworks API Key. You can get one by visiting [fireworks.ai](https://fireworks.ai/).
-
-
-## Running Llama Stack with Fireworks
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template fireworks --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
-```
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
deleted file mode 100644
index ad29c648f..000000000
--- a/llama_stack/templates/fireworks/fireworks.py
+++ /dev/null
@@ -1,177 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
-from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::fireworks", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "files": ["inline::localfs"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "remote::wolfram-alpha",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-
-    name = "fireworks"
-
-    inference_provider = Provider(
-        provider_id="fireworks",
-        provider_type="remote::fireworks",
-        config=FireworksImplConfig.sample_run_config(),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    files_provider = Provider(
-        provider_id="meta-reference-files",
-        provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    available_models = {
-        "fireworks": MODEL_ENTRIES,
-    }
-    default_models = get_model_registry(available_models)
-
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::wolfram_alpha",
-            provider_id="wolfram-alpha",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use Fireworks.AI for running LLM inference",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                    "files": [files_provider],
-                },
-                default_models=default_models + [embedding_model],
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        embedding_provider,
-                    ],
-                    "vector_io": [vector_io_provider],
-                    "files": [files_provider],
-                    "safety": [
-                        Provider(
-                            provider_id="llama-guard",
-                            provider_type="inline::llama-guard",
-                            config={},
-                        ),
-                        Provider(
-                            provider_id="llama-guard-vision",
-                            provider_type="inline::llama-guard",
-                            config={},
-                        ),
-                        Provider(
-                            provider_id="code-scanner",
-                            provider_type="inline::code-scanner",
-                            config={},
-                        ),
-                    ],
-                },
-                default_models=[
-                    *default_models,
-                    embedding_model,
-                ],
-                default_shields=[
-                    ShieldInput(
-                        shield_id="meta-llama/Llama-Guard-3-8B",
-                        provider_id="llama-guard",
-                    ),
-                    ShieldInput(
-                        shield_id="meta-llama/Llama-Guard-3-11B-Vision",
-                        provider_id="llama-guard-vision",
-                    ),
-                    ShieldInput(
-                        shield_id="CodeScanner",
-                        provider_id="code-scanner",
-                    ),
-                ],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "FIREWORKS_API_KEY": (
-                "",
-                "Fireworks.AI API Key",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/fireworks/remote-hosted-report.md b/llama_stack/templates/fireworks/remote-hosted-report.md
deleted file mode 100644
index 2f3c882b7..000000000
--- a/llama_stack/templates/fireworks/remote-hosted-report.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Report for fireworks distribution
-
-## Supported Models
-| Model Descriptor | fireworks |
-|:---|:---|
-| meta-llama/Llama-3-8B-Instruct | ❌ |
-| meta-llama/Llama-3-70B-Instruct | ❌ |
-| meta-llama/Llama-3.1-8B-Instruct | ❌ |
-| meta-llama/Llama-3.1-70B-Instruct | ❌ |
-| meta-llama/Llama-3.1-405B-Instruct-FP8 | ❌ |
-| meta-llama/Llama-3.2-1B-Instruct | ❌ |
-| meta-llama/Llama-3.2-3B-Instruct | ❌ |
-| meta-llama/Llama-3.2-11B-Vision-Instruct | ❌ |
-| meta-llama/Llama-3.2-90B-Vision-Instruct | ❌ |
-| meta-llama/Llama-3.3-70B-Instruct | ❌ |
-| meta-llama/Llama-Guard-3-11B-Vision | ❌ |
-| meta-llama/Llama-Guard-3-1B | ❌ |
-| meta-llama/Llama-Guard-3-8B | ❌ |
-| meta-llama/Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ❌ |
-| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
-| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
-| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ❌ |
-| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ❌ |
-| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ❌ |
-| Text | /completion | streaming | test_text_completion_streaming | ❌ |
-| Text | /completion | non_streaming | test_text_completion_non_streaming | ❌ |
-| Text | /completion | structured_output | test_text_completion_structured_output | ❌ |
-
-## Memory:
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ |
-| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ |
-
-## Agents
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| create_agent_turn | rag | test_rag_agent | ❌ |
-| create_agent_turn | custom_tool | test_custom_tool | ❌ |
-| create_agent_turn | code_execution | test_code_execution | ❌ |
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
deleted file mode 100644
index 6265f5cae..000000000
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ /dev/null
@@ -1,271 +0,0 @@
-version: 2
-image_name: fireworks
-apis:
-- agents
-- datasetio
-- eval
-- files
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: fireworks
-    provider_type: remote::fireworks
-    config:
-      url: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  - provider_id: llama-guard-vision
-    provider_type: inline::llama-guard
-    config: {}
-  - provider_id: code-scanner
-    provider_type: inline::code-scanner
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  files:
-  - provider_id: meta-reference-files
-    provider_type: inline::localfs
-    config:
-      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files}
-      metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db
-models:
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-8b
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
-- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: nomic-ai/nomic-embed-text-v1.5
-  provider_id: fireworks
-  provider_model_id: nomic-ai/nomic-embed-text-v1.5
-  model_type: embedding
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-  provider_id: llama-guard
-- shield_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: llama-guard-vision
-- shield_id: CodeScanner
-  provider_id: code-scanner
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
deleted file mode 100644
index e10404e92..000000000
--- a/llama_stack/templates/fireworks/run.yaml
+++ /dev/null
@@ -1,261 +0,0 @@
-version: 2
-image_name: fireworks
-apis:
-- agents
-- datasetio
-- eval
-- files
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: fireworks
-    provider_type: remote::fireworks
-    config:
-      url: https://api.fireworks.ai/inference/v1
-      api_key: ${env.FIREWORKS_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  files:
-  - provider_id: meta-reference-files
-    provider_type: inline::localfs
-    config:
-      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files}
-      metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db
-models:
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-8b
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: fireworks
-  provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  model_type: llm
-- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: nomic-ai/nomic-embed-text-v1.5
-  provider_id: fireworks
-  provider_model_id: nomic-ai/nomic-embed-text-v1.5
-  model_type: embedding
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/groq/__init__.py b/llama_stack/templates/groq/__init__.py
deleted file mode 100644
index 02a39601d..000000000
--- a/llama_stack/templates/groq/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .groq import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml
deleted file mode 100644
index 7e50a899f..000000000
--- a/llama_stack/templates/groq/build.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use Groq for running LLM inference
-  providers:
-    inference:
-    - remote::groq
-    vector_io:
-    - inline::faiss
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/groq/doc_template.md b/llama_stack/templates/groq/doc_template.md
deleted file mode 100644
index 80945ff9c..000000000
--- a/llama_stack/templates/groq/doc_template.md
+++ /dev/null
@@ -1,69 +0,0 @@
----
-orphan: true
----
-# Groq Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Groq API Key. You can get one by visiting [Groq](https://api.groq.com/).
-
-
-## Running Llama Stack with Groq
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env GROQ_API_KEY=$GROQ_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template groq --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env GROQ_API_KEY=$GROQ_API_KEY
-```
diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py
deleted file mode 100644
index 9e166a288..000000000
--- a/llama_stack/templates/groq/groq.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.remote.inference.groq import GroqConfig
-from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::groq"],
-        "vector_io": ["inline::faiss"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-        ],
-    }
-    name = "groq"
-
-    inference_provider = Provider(
-        provider_id=name,
-        provider_type=f"remote::{name}",
-        config=GroqConfig.sample_run_config(),
-    )
-
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-
-    available_models = {
-        "groq": MODEL_ENTRIES,
-    }
-    default_models = get_model_registry(available_models)
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use Groq for running LLM inference",
-        docker_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMASTACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "GROQ_API_KEY": (
-                "",
-                "Groq API Key",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
deleted file mode 100644
index 21c8f7e0f..000000000
--- a/llama_stack/templates/groq/run.yaml
+++ /dev/null
@@ -1,210 +0,0 @@
-version: 2
-image_name: groq
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: groq
-    provider_type: remote::groq
-    config:
-      url: https://api.groq.com
-      api_key: ${env.GROQ_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/inference_store.db
-models:
-- metadata: {}
-  model_id: groq/llama3-8b-8192
-  provider_id: groq
-  provider_model_id: groq/llama3-8b-8192
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-3.1-8B-Instruct
-  provider_id: groq
-  provider_model_id: groq/llama3-8b-8192
-  model_type: llm
-- metadata: {}
-  model_id: groq/llama-3.1-8b-instant
-  provider_id: groq
-  provider_model_id: groq/llama-3.1-8b-instant
-  model_type: llm
-- metadata: {}
-  model_id: groq/llama3-70b-8192
-  provider_id: groq
-  provider_model_id: groq/llama3-70b-8192
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-3-70B-Instruct
-  provider_id: groq
-  provider_model_id: groq/llama3-70b-8192
-  model_type: llm
-- metadata: {}
-  model_id: groq/llama-3.3-70b-versatile
-  provider_id: groq
-  provider_model_id: groq/llama-3.3-70b-versatile
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-3.3-70B-Instruct
-  provider_id: groq
-  provider_model_id: groq/llama-3.3-70b-versatile
-  model_type: llm
-- metadata: {}
-  model_id: groq/llama-3.2-3b-preview
-  provider_id: groq
-  provider_model_id: groq/llama-3.2-3b-preview
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-3.2-3B-Instruct
-  provider_id: groq
-  provider_model_id: groq/llama-3.2-3b-preview
-  model_type: llm
-- metadata: {}
-  model_id: groq/llama-4-scout-17b-16e-instruct
-  provider_id: groq
-  provider_model_id: groq/llama-4-scout-17b-16e-instruct
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: groq
-  provider_model_id: groq/llama-4-scout-17b-16e-instruct
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
-  provider_id: groq
-  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: groq
-  provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
-  model_type: llm
-- metadata: {}
-  model_id: groq/llama-4-maverick-17b-128e-instruct
-  provider_id: groq
-  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: groq
-  provider_model_id: groq/llama-4-maverick-17b-128e-instruct
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
-  provider_id: groq
-  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
-  model_type: llm
-- metadata: {}
-  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: groq
-  provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/hf-endpoint/__init__.py b/llama_stack/templates/hf-endpoint/__init__.py
deleted file mode 100644
index f2c00e3bf..000000000
--- a/llama_stack/templates/hf-endpoint/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .hf_endpoint import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
deleted file mode 100644
index 9fca9ac22..000000000
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
-  providers:
-    inference:
-    - remote::hf::endpoint
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
deleted file mode 100644
index 23887469f..000000000
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::hf::endpoint"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-    name = "hf-endpoint"
-    inference_provider = Provider(
-        provider_id="hf-endpoint",
-        provider_type="remote::hf::endpoint",
-        config=InferenceEndpointImplConfig.sample_run_config(),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="hf-endpoint",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="hf-endpoint-safety",
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        embedding_provider,
-                        Provider(
-                            provider_id="hf-endpoint-safety",
-                            provider_type="remote::hf::endpoint",
-                            config=InferenceEndpointImplConfig.sample_run_config(
-                                endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}",
-                            ),
-                        ),
-                    ],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[
-                    inference_model,
-                    safety_model,
-                    embedding_model,
-                ],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "HF_API_TOKEN": (
-                "hf_...",
-                "Hugging Face API token",
-            ),
-            "INFERENCE_ENDPOINT_NAME": (
-                "",
-                "HF Inference endpoint name for the main inference model",
-            ),
-            "SAFETY_INFERENCE_ENDPOINT_NAME": (
-                "",
-                "HF Inference endpoint for the safety model",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model served by the HF Inference Endpoint",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Safety model served by the HF Inference Endpoint",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
deleted file mode 100644
index 2ae1d7685..000000000
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ /dev/null
@@ -1,142 +0,0 @@
-version: 2
-image_name: hf-endpoint
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: hf-endpoint
-    provider_type: remote::hf::endpoint
-    config:
-      endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
-      api_token: ${env.HF_API_TOKEN}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  - provider_id: hf-endpoint-safety
-    provider_type: remote::hf::endpoint
-    config:
-      endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME}
-      api_token: ${env.HF_API_TOKEN}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: hf-endpoint
-  model_type: llm
-- metadata: {}
-  model_id: ${env.SAFETY_MODEL}
-  provider_id: hf-endpoint-safety
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: ${env.SAFETY_MODEL}
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
deleted file mode 100644
index 3ec5ae9c1..000000000
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ /dev/null
@@ -1,132 +0,0 @@
-version: 2
-image_name: hf-endpoint
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: hf-endpoint
-    provider_type: remote::hf::endpoint
-    config:
-      endpoint_name: ${env.INFERENCE_ENDPOINT_NAME}
-      api_token: ${env.HF_API_TOKEN}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: hf-endpoint
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/hf-serverless/__init__.py b/llama_stack/templates/hf-serverless/__init__.py
deleted file mode 100644
index a5f1ab54a..000000000
--- a/llama_stack/templates/hf-serverless/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .hf_serverless import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
deleted file mode 100644
index 214245116..000000000
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
-  providers:
-    inference:
-    - remote::hf::serverless
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
deleted file mode 100644
index c58c0921d..000000000
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::hf::serverless", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-
-    name = "hf-serverless"
-    inference_provider = Provider(
-        provider_id="hf-serverless",
-        provider_type="remote::hf::serverless",
-        config=InferenceAPIImplConfig.sample_run_config(),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="hf-serverless",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="hf-serverless-safety",
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        embedding_provider,
-                        Provider(
-                            provider_id="hf-serverless-safety",
-                            provider_type="remote::hf::serverless",
-                            config=InferenceAPIImplConfig.sample_run_config(
-                                repo="${env.SAFETY_MODEL}",
-                            ),
-                        ),
-                    ],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[
-                    inference_model,
-                    safety_model,
-                    embedding_model,
-                ],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "HF_API_TOKEN": (
-                "hf_...",
-                "Hugging Face API token",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model to be served by the HF Serverless endpoint",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Safety model to be served by the HF Serverless endpoint",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
deleted file mode 100644
index 3871b77e7..000000000
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ /dev/null
@@ -1,142 +0,0 @@
-version: 2
-image_name: hf-serverless
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: hf-serverless
-    provider_type: remote::hf::serverless
-    config:
-      huggingface_repo: ${env.INFERENCE_MODEL}
-      api_token: ${env.HF_API_TOKEN}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  - provider_id: hf-serverless-safety
-    provider_type: remote::hf::serverless
-    config:
-      huggingface_repo: ${env.SAFETY_MODEL}
-      api_token: ${env.HF_API_TOKEN}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: hf-serverless
-  model_type: llm
-- metadata: {}
-  model_id: ${env.SAFETY_MODEL}
-  provider_id: hf-serverless-safety
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: ${env.SAFETY_MODEL}
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
deleted file mode 100644
index 0a5b59400..000000000
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ /dev/null
@@ -1,132 +0,0 @@
-version: 2
-image_name: hf-serverless
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: hf-serverless
-    provider_type: remote::hf::serverless
-    config:
-      huggingface_repo: ${env.INFERENCE_MODEL}
-      api_token: ${env.HF_API_TOKEN}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: hf-serverless
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/llama_api/__init__.py b/llama_stack/templates/llama_api/__init__.py
deleted file mode 100644
index 57cc75730..000000000
--- a/llama_stack/templates/llama_api/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .llama_api import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/llama_api/build.yaml b/llama_stack/templates/llama_api/build.yaml
deleted file mode 100644
index 44a42594a..000000000
--- a/llama_stack/templates/llama_api/build.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-version: 2
-distribution_spec:
-  description: Distribution for running e2e tests in CI
-  providers:
-    inference:
-    - remote::llama-openai-compat
-    - inline::sentence-transformers
-    vector_io:
-    - inline::sqlite-vec
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/llama_api/llama_api.py b/llama_stack/templates/llama_api/llama_api.py
deleted file mode 100644
index 7631781af..000000000
--- a/llama_stack/templates/llama_api/llama_api.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
-    SQLiteVectorIOConfig,
-)
-from llama_stack.providers.remote.inference.llama_openai_compat.config import (
-    LlamaCompatConfig,
-)
-from llama_stack.providers.remote.inference.llama_openai_compat.models import (
-    MODEL_ENTRIES as LLLAMA_MODEL_ENTRIES,
-)
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.remote.vector_io.pgvector.config import (
-    PGVectorVectorIOConfig,
-)
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_inference_providers() -> tuple[list[Provider], list[ModelInput]]:
-    # in this template, we allow each API key to be optional
-    providers = [
-        (
-            "llama-openai-compat",
-            LLLAMA_MODEL_ENTRIES,
-            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:+}"),
-        ),
-    ]
-    inference_providers = []
-    available_models = {}
-    for provider_id, model_entries, config in providers:
-        inference_providers.append(
-            Provider(
-                provider_id=provider_id,
-                provider_type=f"remote::{provider_id}",
-                config=config,
-            )
-        )
-        available_models[provider_id] = model_entries
-    return inference_providers, available_models
-
-
-def get_distribution_template() -> DistributionTemplate:
-    inference_providers, available_models = get_inference_providers()
-    providers = {
-        "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
-        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-    name = "llama_api"
-
-    vector_io_providers = [
-        Provider(
-            provider_id="sqlite-vec",
-            provider_type="inline::sqlite-vec",
-            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
-            provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
-            provider_type="remote::pgvector",
-            config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:+}",
-                user="${env.PGVECTOR_USER:+}",
-                password="${env.PGVECTOR_PASSWORD:+}",
-            ),
-        ),
-    ]
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id=embedding_provider.provider_id,
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-
-    default_models = get_model_registry(available_models)
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Distribution for running e2e tests in CI",
-        container_image=None,
-        template_path=None,
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": inference_providers + [embedding_provider],
-                    "vector_io": vector_io_providers,
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml
deleted file mode 100644
index b627ed2f1..000000000
--- a/llama_stack/templates/llama_api/run.yaml
+++ /dev/null
@@ -1,168 +0,0 @@
-version: 2
-image_name: llama_api
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: llama-openai-compat
-    provider_type: remote::llama-openai-compat
-    config:
-      openai_compat_api_base: https://api.llama.com/compat/v1/
-      api_key: ${env.LLAMA_API_KEY:+}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: sqlite-vec
-    provider_type: inline::sqlite-vec
-    config:
-      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
-    provider_type: remote::chromadb
-    config:
-      url: ${env.CHROMADB_URL:+}
-  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
-    provider_type: remote::pgvector
-    config:
-      host: ${env.PGVECTOR_HOST:=localhost}
-      port: ${env.PGVECTOR_PORT:=5432}
-      db: ${env.PGVECTOR_DB:+}
-      user: ${env.PGVECTOR_USER:+}
-      password: ${env.PGVECTOR_PASSWORD:+}
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/inference_store.db
-models:
-- metadata: {}
-  model_id: Llama-3.3-70B-Instruct
-  provider_id: llama-openai-compat
-  provider_model_id: Llama-3.3-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: llama-openai-compat
-  provider_model_id: Llama-3.3-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: Llama-4-Scout-17B-16E-Instruct-FP8
-  provider_id: llama-openai-compat
-  provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: llama-openai-compat
-  provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8
-  model_type: llm
-- metadata: {}
-  model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: llama-openai-compat
-  provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: llama-openai-compat
-  provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/nvidia/__init__.py b/llama_stack/templates/nvidia/__init__.py
deleted file mode 100644
index 24e2fbd21..000000000
--- a/llama_stack/templates/nvidia/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .nvidia import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml
deleted file mode 100644
index 51685b2e3..000000000
--- a/llama_stack/templates/nvidia/build.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use NVIDIA NIM for running LLM inference, evaluation and safety
-  providers:
-    inference:
-    - remote::nvidia
-    vector_io:
-    - inline::faiss
-    safety:
-    - remote::nvidia
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - remote::nvidia
-    post_training:
-    - remote::nvidia
-    datasetio:
-    - inline::localfs
-    - remote::nvidia
-    scoring:
-    - inline::basic
-    tool_runtime:
-    - inline::rag-runtime
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md
deleted file mode 100644
index 3cb8245df..000000000
--- a/llama_stack/templates/nvidia/doc_template.md
+++ /dev/null
@@ -1,149 +0,0 @@
-# NVIDIA Distribution
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-## Prerequisites
-### NVIDIA API Keys
-
-Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable.
-
-### Deploy NeMo Microservices Platform
-The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform.
-
-## Supported Services
-Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints.
-
-### Inference: NVIDIA NIM
-NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs:
-  1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key)
-  2. Self-hosted: NVIDIA NIMs that run on your own infrastructure.
-
-The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment.
-
-### Datasetio API: NeMo Data Store
-The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint.
-
-See the {repopath}`NVIDIA Datasetio docs::llama_stack/providers/remote/datasetio/nvidia/README.md` for supported features and example usage.
-
-### Eval API: NeMo Evaluator
-The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the {repopath}`NVIDIA Eval docs::llama_stack/providers/remote/eval/nvidia/README.md` for supported features and example usage.
-
-### Post-Training API: NeMo Customizer
-The NeMo Customizer microservice supports fine-tuning models. You can reference {repopath}`this list of supported models::llama_stack/providers/remote/post_training/nvidia/models.py` that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the {repopath}`NVIDIA Post-Training docs::llama_stack/providers/remote/post_training/nvidia/README.md` for supported features and example usage.
-
-### Safety API: NeMo Guardrails
-The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint.
-
-See the {repopath}`NVIDIA Safety docs::llama_stack/providers/remote/safety/nvidia/README.md` for supported features and example usage.
-
-## Deploying models
-In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`.
-
-Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart.
-```sh
-# URL to NeMo NIM Proxy service
-export NEMO_URL="http://nemo.test"
-
-curl --location "$NEMO_URL/v1/deployment/model-deployments" \
-   -H 'accept: application/json' \
-   -H 'Content-Type: application/json' \
-   -d '{
-      "name": "llama-3.2-1b-instruct",
-      "namespace": "meta",
-      "config": {
-         "model": "meta/llama-3.2-1b-instruct",
-         "nim_deployment": {
-            "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct",
-            "image_tag": "1.8.3",
-            "pvc_size": "25Gi",
-            "gpu": 1,
-            "additional_envs": {
-               "NIM_GUIDED_DECODING_BACKEND": "fast_outlines"
-            }
-         }
-      }
-   }'
-```
-This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference.
-
-You can also remove a deployed NIM to free up GPU resources, if needed.
-```sh
-export NEMO_URL="http://nemo.test"
-
-curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct"
-```
-
-## Running Llama Stack with NVIDIA
-
-You can do this via Conda or venv (build code), or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
-```
-
-### Via Conda
-
-```bash
-INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
-llama stack build --template nvidia --image-type conda
-llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
-```
-
-### Via venv
-
-If you've set up your local development environment, you can also build the image using your local virtual environment.
-
-```bash
-INFERENCE_MODEL=meta-llama/Llama-3.1-8b-Instruct
-llama stack build --template nvidia --image-type venv
-llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
-```
-
-## Example Notebooks
-For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in {repopath}`docs/notebooks/nvidia`.
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
deleted file mode 100644
index 4eccfb25c..000000000
--- a/llama_stack/templates/nvidia/nvidia.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
-from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
-from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
-from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
-from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::nvidia"],
-        "vector_io": ["inline::faiss"],
-        "safety": ["remote::nvidia"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["remote::nvidia"],
-        "post_training": ["remote::nvidia"],
-        "datasetio": ["inline::localfs", "remote::nvidia"],
-        "scoring": ["inline::basic"],
-        "tool_runtime": ["inline::rag-runtime"],
-    }
-
-    inference_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NVIDIAConfig.sample_run_config(),
-    )
-    safety_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NVIDIASafetyConfig.sample_run_config(),
-    )
-    datasetio_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NvidiaDatasetIOConfig.sample_run_config(),
-    )
-    eval_provider = Provider(
-        provider_id="nvidia",
-        provider_type="remote::nvidia",
-        config=NVIDIAEvalConfig.sample_run_config(),
-    )
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="nvidia",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="nvidia",
-    )
-
-    available_models = {
-        "nvidia": MODEL_ENTRIES,
-    }
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    default_models = get_model_registry(available_models)
-    return DistributionTemplate(
-        name="nvidia",
-        distro_type="self_hosted",
-        description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider],
-                    "datasetio": [datasetio_provider],
-                    "eval": [eval_provider],
-                },
-                default_models=default_models,
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        safety_provider,
-                    ],
-                    "eval": [eval_provider],
-                },
-                default_models=[inference_model, safety_model],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "NVIDIA_API_KEY": (
-                "",
-                "NVIDIA API Key",
-            ),
-            "NVIDIA_APPEND_API_VERSION": (
-                "True",
-                "Whether to append the API version to the base_url",
-            ),
-            ## Nemo Customizer related variables
-            "NVIDIA_DATASET_NAMESPACE": (
-                "default",
-                "NVIDIA Dataset Namespace",
-            ),
-            "NVIDIA_PROJECT_ID": (
-                "test-project",
-                "NVIDIA Project ID",
-            ),
-            "NVIDIA_CUSTOMIZER_URL": (
-                "https://customizer.api.nvidia.com",
-                "NVIDIA Customizer URL",
-            ),
-            "NVIDIA_OUTPUT_MODEL_DIR": (
-                "test-example-model@v1",
-                "NVIDIA Output Model Directory",
-            ),
-            "GUARDRAILS_SERVICE_URL": (
-                "http://0.0.0.0:7331",
-                "URL for the NeMo Guardrails Service",
-            ),
-            "NVIDIA_GUARDRAILS_CONFIG_ID": (
-                "self-check",
-                "NVIDIA Guardrail Configuration ID",
-            ),
-            "NVIDIA_EVALUATOR_URL": (
-                "http://0.0.0.0:7331",
-                "URL for the NeMo Evaluator Service",
-            ),
-            "INFERENCE_MODEL": (
-                "Llama3.1-8B-Instruct",
-                "Inference model",
-            ),
-            "SAFETY_MODEL": (
-                "meta/llama-3.1-8b-instruct",
-                "Name of the model to use for safety",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
deleted file mode 100644
index 875fccc9d..000000000
--- a/llama_stack/templates/nvidia/run-with-safety.yaml
+++ /dev/null
@@ -1,121 +0,0 @@
-version: 2
-image_name: nvidia
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- post_training
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
-      api_key: ${env.NVIDIA_API_KEY:+}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
-  safety:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
-  eval:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
-  post_training:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      api_key: ${env.NVIDIA_API_KEY:+}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
-      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
-      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
-  datasetio:
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      api_key: ${env.NVIDIA_API_KEY:+}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
-      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
-      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  tool_runtime:
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: nvidia
-  model_type: llm
-- metadata: {}
-  model_id: ${env.SAFETY_MODEL}
-  provider_id: nvidia
-  model_type: llm
-shields:
-- shield_id: ${env.SAFETY_MODEL}
-  provider_id: nvidia
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
deleted file mode 100644
index 4477d5244..000000000
--- a/llama_stack/templates/nvidia/run.yaml
+++ /dev/null
@@ -1,227 +0,0 @@
-version: 2
-image_name: nvidia
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- post_training
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
-      api_key: ${env.NVIDIA_API_KEY:+}
-      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
-  safety:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
-      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/trace_store.db
-  eval:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
-  post_training:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      api_key: ${env.NVIDIA_API_KEY:+}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
-      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
-      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
-  datasetio:
-  - provider_id: nvidia
-    provider_type: remote::nvidia
-    config:
-      api_key: ${env.NVIDIA_API_KEY:+}
-      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
-      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
-      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  tool_runtime:
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
-models:
-- metadata: {}
-  model_id: meta/llama3-8b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama3-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3-8B-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama3-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama3-70b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3-70B-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.1-8b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.1-70b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.1-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.1-405b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.1-405b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.2-1b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-1b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-1b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.2-3b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-3b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.2-11b-vision-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-11b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.2-90b-vision-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.2-90b-vision-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta/llama-3.3-70b-instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.3-70b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: nvidia
-  provider_model_id: meta/llama-3.3-70b-instruct
-  model_type: llm
-- metadata:
-    embedding_dimension: 2048
-    context_length: 8192
-  model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
-  provider_id: nvidia
-  provider_model_id: nvidia/llama-3.2-nv-embedqa-1b-v2
-  model_type: embedding
-- metadata:
-    embedding_dimension: 1024
-    context_length: 512
-  model_id: nvidia/nv-embedqa-e5-v5
-  provider_id: nvidia
-  provider_model_id: nvidia/nv-embedqa-e5-v5
-  model_type: embedding
-- metadata:
-    embedding_dimension: 4096
-    context_length: 512
-  model_id: nvidia/nv-embedqa-mistral-7b-v2
-  provider_id: nvidia
-  provider_model_id: nvidia/nv-embedqa-mistral-7b-v2
-  model_type: embedding
-- metadata:
-    embedding_dimension: 1024
-    context_length: 512
-  model_id: snowflake/arctic-embed-l
-  provider_id: nvidia
-  provider_model_id: snowflake/arctic-embed-l
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/ollama/__init__.py b/llama_stack/templates/ollama/__init__.py
deleted file mode 100644
index 3a2c40f27..000000000
--- a/llama_stack/templates/ollama/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .ollama import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
deleted file mode 100644
index cbf4281a2..000000000
--- a/llama_stack/templates/ollama/build.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use (an external) Ollama server for running LLM inference
-  providers:
-    inference:
-    - remote::ollama
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    files:
-    - inline::localfs
-    post_training:
-    - inline::huggingface
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-    - remote::wolfram-alpha
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md
deleted file mode 100644
index aaa65bab2..000000000
--- a/llama_stack/templates/ollama/doc_template.md
+++ /dev/null
@@ -1,152 +0,0 @@
----
-orphan: true
----
-# Ollama Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-
-## Setting up Ollama server
-
-Please check the [Ollama Documentation](https://github.com/ollama/ollama) on how to install and run Ollama. After installing Ollama, you need to run `ollama serve` to start the server.
-
-In order to load models, you can run:
-
-```bash
-export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
-
-# ollama names this model differently, and we must use the ollama name when loading the model
-export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
-ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m
-```
-
-If you are using Llama Stack Safety / Shield APIs, you will also need to pull and run the safety model.
-
-```bash
-export SAFETY_MODEL="meta-llama/Llama-Guard-3-1B"
-
-# ollama names this model differently, and we must use the ollama name when loading the model
-export OLLAMA_SAFETY_MODEL="llama-guard3:1b"
-ollama run $OLLAMA_SAFETY_MODEL --keepalive 60m
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with Ollama as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-export LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env OLLAMA_URL=http://host.docker.internal:11434
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env OLLAMA_URL=http://host.docker.internal:11434
-```
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-export LLAMA_STACK_PORT=8321
-
-llama stack build --template {{ name }} --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env OLLAMA_URL=http://localhost:11434
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env OLLAMA_URL=http://localhost:11434
-```
-
-
-### (Optional) Update Model Serving Configuration
-
-```{note}
-Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/models.py) for the supported Ollama models.
-```
-
-To serve a new model with `ollama`
-```bash
-ollama run <model_name>
-```
-
-To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
-```
-$ ollama ps
-NAME                         ID              SIZE      PROCESSOR    UNTIL
-llama3.2:3b-instruct-fp16    195a8c01d91e    8.6 GB    100% GPU     9 minutes from now
-```
-
-To verify that the model served by ollama is correctly connected to Llama Stack server
-```bash
-$ llama-stack-client models list
-
-Available Models
-
-┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
-┃ model_type   ┃ identifier                           ┃ provider_resource_id         ┃ metadata  ┃ provider_id ┃
-┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
-│ llm          │ meta-llama/Llama-3.2-3B-Instruct     │ llama3.2:3b-instruct-fp16    │           │ ollama      │
-└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
-
-Total models: 1
-```
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
deleted file mode 100644
index cba25296b..000000000
--- a/llama_stack/templates/ollama/ollama.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
-from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::ollama"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "files": ["inline::localfs"],
-        "post_training": ["inline::huggingface"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-            "remote::wolfram-alpha",
-        ],
-    }
-    name = "ollama"
-    inference_provider = Provider(
-        provider_id="ollama",
-        provider_type="remote::ollama",
-        config=OllamaImplConfig.sample_run_config(),
-    )
-    vector_io_provider_faiss = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    files_provider = Provider(
-        provider_id="meta-reference-files",
-        provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    posttraining_provider = Provider(
-        provider_id="huggingface",
-        provider_type="inline::huggingface",
-        config=HuggingFacePostTrainingConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="ollama",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="ollama",
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="ollama",
-        provider_model_id="all-minilm:latest",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::wolfram_alpha",
-            provider_id="wolfram-alpha",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use (an external) Ollama server for running LLM inference",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider],
-                    "vector_io": [vector_io_provider_faiss],
-                    "files": [files_provider],
-                    "post_training": [posttraining_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider],
-                    "vector_io": [vector_io_provider_faiss],
-                    "files": [files_provider],
-                    "post_training": [posttraining_provider],
-                    "safety": [
-                        Provider(
-                            provider_id="llama-guard",
-                            provider_type="inline::llama-guard",
-                            config={},
-                        ),
-                        Provider(
-                            provider_id="code-scanner",
-                            provider_type="inline::code-scanner",
-                            config={},
-                        ),
-                    ],
-                },
-                default_models=[
-                    inference_model,
-                    safety_model,
-                    embedding_model,
-                ],
-                default_shields=[
-                    ShieldInput(
-                        shield_id="${env.SAFETY_MODEL}",
-                        provider_id="llama-guard",
-                    ),
-                    ShieldInput(
-                        shield_id="CodeScanner",
-                        provider_id="code-scanner",
-                    ),
-                ],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "OLLAMA_URL": (
-                "http://127.0.0.1:11434",
-                "URL of the Ollama server",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model loaded into the Ollama server",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Safety model loaded into the Ollama server",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
deleted file mode 100644
index 5e906a12c..000000000
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ /dev/null
@@ -1,163 +0,0 @@
-version: 2
-image_name: ollama
-apis:
-- agents
-- datasetio
-- eval
-- files
-- inference
-- post_training
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: ollama
-    provider_type: remote::ollama
-    config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
-      raise_on_connect_error: true
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  - provider_id: code-scanner
-    provider_type: inline::code-scanner
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  files:
-  - provider_id: meta-reference-files
-    provider_type: inline::localfs
-    config:
-      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files}
-      metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db
-  post_training:
-  - provider_id: huggingface
-    provider_type: inline::huggingface
-    config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: ollama
-  model_type: llm
-- metadata: {}
-  model_id: ${env.SAFETY_MODEL}
-  provider_id: ollama
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: ollama
-  provider_model_id: all-minilm:latest
-  model_type: embedding
-shields:
-- shield_id: ${env.SAFETY_MODEL}
-  provider_id: llama-guard
-- shield_id: CodeScanner
-  provider_id: code-scanner
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
deleted file mode 100644
index d2b4e3978..000000000
--- a/llama_stack/templates/ollama/run.yaml
+++ /dev/null
@@ -1,153 +0,0 @@
-version: 2
-image_name: ollama
-apis:
-- agents
-- datasetio
-- eval
-- files
-- inference
-- post_training
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: ollama
-    provider_type: remote::ollama
-    config:
-      url: ${env.OLLAMA_URL:=http://localhost:11434}
-      raise_on_connect_error: true
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  files:
-  - provider_id: meta-reference-files
-    provider_type: inline::localfs
-    config:
-      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files}
-      metadata_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db
-  post_training:
-  - provider_id: huggingface
-    provider_type: inline::huggingface
-    config:
-      checkpoint_format: huggingface
-      distributed_backend: null
-      device: cpu
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: ollama
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: ollama
-  provider_model_id: all-minilm:latest
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
diff --git a/llama_stack/templates/passthrough/__init__.py b/llama_stack/templates/passthrough/__init__.py
deleted file mode 100644
index 9632c09fb..000000000
--- a/llama_stack/templates/passthrough/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .passthrough import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml
deleted file mode 100644
index e2e041dbc..000000000
--- a/llama_stack/templates/passthrough/build.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use Passthrough hosted llama-stack endpoint for LLM inference
-  providers:
-    inference:
-    - remote::passthrough
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - remote::wolfram-alpha
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/passthrough/doc_template.md b/llama_stack/templates/passthrough/doc_template.md
deleted file mode 100644
index f9e88873d..000000000
--- a/llama_stack/templates/passthrough/doc_template.md
+++ /dev/null
@@ -1,35 +0,0 @@
----
-orphan: true
----
-# Passthrough Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
diff --git a/llama_stack/templates/passthrough/passthrough.py b/llama_stack/templates/passthrough/passthrough.py
deleted file mode 100644
index 1b94a9aae..000000000
--- a/llama_stack/templates/passthrough/passthrough.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.passthrough.config import (
-    PassthroughImplConfig,
-)
-from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::passthrough", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "remote::wolfram-alpha",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-
-    name = "passthrough"
-
-    inference_provider = Provider(
-        provider_id="passthrough",
-        provider_type="remote::passthrough",
-        config=PassthroughImplConfig.sample_run_config(),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    default_models = [
-        ModelInput(
-            metadata={},
-            model_id="meta-llama/Llama-3.1-8B-Instruct",
-            provider_id="passthrough",
-            provider_model_id="llama3.1-8b-instruct",
-            model_type=ModelType.llm,
-        ),
-        ModelInput(
-            metadata={},
-            model_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
-            provider_id="passthrough",
-            provider_model_id="llama3.2-11b-vision-instruct",
-            model_type=ModelType.llm,
-        ),
-    ]
-
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::wolfram_alpha",
-            provider_id="wolfram-alpha",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use Passthrough hosted llama-stack endpoint for LLM inference",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider={
-            "passthrough": [
-                ProviderModelEntry(
-                    provider_model_id="llama3.1-8b-instruct",
-                    model_type=ModelType.llm,
-                ),
-                ProviderModelEntry(
-                    provider_model_id="llama3.2-11b-vision-instruct",
-                    model_type=ModelType.llm,
-                ),
-            ],
-        },
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=default_models + [embedding_model],
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        embedding_provider,
-                    ],
-                    "vector_io": [vector_io_provider],
-                    "safety": [
-                        Provider(
-                            provider_id="llama-guard",
-                            provider_type="inline::llama-guard",
-                            config={},
-                        ),
-                        Provider(
-                            provider_id="llama-guard-vision",
-                            provider_type="inline::llama-guard",
-                            config={},
-                        ),
-                        Provider(
-                            provider_id="code-scanner",
-                            provider_type="inline::code-scanner",
-                            config={},
-                        ),
-                    ],
-                },
-                default_models=[
-                    *default_models,
-                    embedding_model,
-                ],
-                default_shields=[
-                    ShieldInput(
-                        shield_id="meta-llama/Llama-Guard-3-8B",
-                        provider_id="llama-guard",
-                    ),
-                    ShieldInput(
-                        shield_id="meta-llama/Llama-Guard-3-11B-Vision",
-                        provider_id="llama-guard-vision",
-                    ),
-                    ShieldInput(
-                        shield_id="CodeScanner",
-                        provider_id="code-scanner",
-                    ),
-                ],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "PASSTHROUGH_API_KEY": (
-                "",
-                "Passthrough API Key",
-            ),
-            "PASSTHROUGH_URL": (
-                "",
-                "Passthrough URL",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml
deleted file mode 100644
index c5b047511..000000000
--- a/llama_stack/templates/passthrough/run-with-safety.yaml
+++ /dev/null
@@ -1,155 +0,0 @@
-version: 2
-image_name: passthrough
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: passthrough
-    provider_type: remote::passthrough
-    config:
-      url: ${env.PASSTHROUGH_URL}
-      api_key: ${env.PASSTHROUGH_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  - provider_id: llama-guard-vision
-    provider_type: inline::llama-guard
-    config: {}
-  - provider_id: code-scanner
-    provider_type: inline::code-scanner
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db
-models:
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: passthrough
-  provider_model_id: llama3.1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: passthrough
-  provider_model_id: llama3.2-11b-vision-instruct
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-  provider_id: llama-guard
-- shield_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: llama-guard-vision
-- shield_id: CodeScanner
-  provider_id: code-scanner
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml
deleted file mode 100644
index 896b3c91e..000000000
--- a/llama_stack/templates/passthrough/run.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-version: 2
-image_name: passthrough
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: passthrough
-    provider_type: remote::passthrough
-    config:
-      url: ${env.PASSTHROUGH_URL}
-      api_key: ${env.PASSTHROUGH_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db
-models:
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: passthrough
-  provider_model_id: llama3.1-8b-instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: passthrough
-  provider_model_id: llama3.2-11b-vision-instruct
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/remote-vllm/__init__.py b/llama_stack/templates/remote-vllm/__init__.py
deleted file mode 100644
index 7b3d59a01..000000000
--- a/llama_stack/templates/remote-vllm/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vllm import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml
deleted file mode 100644
index 0298b01c7..000000000
--- a/llama_stack/templates/remote-vllm/build.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use (an external) vLLM server for running LLM inference
-  providers:
-    inference:
-    - remote::vllm
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    telemetry:
-    - inline::meta-reference
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-    - remote::wolfram-alpha
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
deleted file mode 100644
index 5684888da..000000000
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ /dev/null
@@ -1,284 +0,0 @@
----
-orphan: true
----
-# Remote vLLM Distribution
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
-
-{{ providers_table }}
-
-You can use this distribution if you want to run an independent vLLM server for inference.
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-
-## Setting up vLLM server
-
-In the following sections, we'll use AMD, NVIDIA or Intel GPUs to serve as hardware accelerators for the vLLM
-server, which acts as both the LLM inference provider and the safety provider. Note that vLLM also
-[supports many other hardware accelerators](https://docs.vllm.ai/en/latest/getting_started/installation.html) and
-that we only use GPUs here for demonstration purposes. Note that if you run into issues, you can include the environment variable `--env VLLM_DEBUG_LOG_API_SERVER_RESPONSE=true` (available in vLLM v0.8.3 and above) in the `docker run` command to enable log response from API server for debugging.
-
-### Setting up vLLM server on AMD GPU
-
-AMD provides two main vLLM container options:
-- rocm/vllm: Production-ready container
-- rocm/vllm-dev: Development container with the latest vLLM features
-
-Please check the [Blog about ROCm vLLM Usage](https://rocm.blogs.amd.com/software-tools-optimization/vllm-container/README.html) to get more details.
-
-Here is a sample script to start a ROCm vLLM server locally via Docker:
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export CUDA_VISIBLE_DEVICES=0
-export VLLM_DIMG="rocm/vllm-dev:main"
-
-docker run \
-    --pull always \
-    --ipc=host \
-    --privileged \
-    --shm-size 16g \
-    --device=/dev/kfd \
-    --device=/dev/dri \
-    --group-add video \
-    --cap-add=SYS_PTRACE \
-    --cap-add=CAP_SYS_ADMIN \
-    --security-opt seccomp=unconfined \
-    --security-opt apparmor=unconfined \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
-    -p $INFERENCE_PORT:$INFERENCE_PORT \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    $VLLM_DIMG \
-    python -m vllm.entrypoints.openai.api_server \
-    --model $INFERENCE_MODEL \
-    --port $INFERENCE_PORT
-```
-
-Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html).
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-export VLLM_DIMG="rocm/vllm-dev:main"
-
-docker run \
-    --pull always \
-    --ipc=host \
-    --privileged \
-    --shm-size 16g \
-    --device=/dev/kfd \
-    --device=/dev/dri \
-    --group-add video \
-    --cap-add=SYS_PTRACE \
-    --cap-add=CAP_SYS_ADMIN \
-    --security-opt seccomp=unconfined \
-    --security-opt apparmor=unconfined \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env "HIP_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" \
-    -p $SAFETY_PORT:$SAFETY_PORT \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    $VLLM_DIMG \
-    python -m vllm.entrypoints.openai.api_server \
-    --model $SAFETY_MODEL \
-    --port $SAFETY_PORT
-```
-
-### Setting up vLLM server on NVIDIA GPU
-
-Please check the [vLLM Documentation](https://docs.vllm.ai/en/v0.5.5/serving/deploying_with_docker.html) to get a vLLM endpoint. Here is a sample script to start a vLLM server locally via Docker:
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export CUDA_VISIBLE_DEVICES=0
-
-docker run \
-    --pull always \
-    --runtime nvidia \
-    --gpus $CUDA_VISIBLE_DEVICES \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    -p $INFERENCE_PORT:$INFERENCE_PORT \
-    --ipc=host \
-    vllm/vllm-openai:latest \
-    --gpu-memory-utilization 0.7 \
-    --model $INFERENCE_MODEL \
-    --port $INFERENCE_PORT
-```
-
-Note that you'll also need to set `--enable-auto-tool-choice` and `--tool-call-parser` to [enable tool calling in vLLM](https://docs.vllm.ai/en/latest/features/tool_calling.html).
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-
-docker run \
-    --pull always \
-    --runtime nvidia \
-    --gpus $CUDA_VISIBLE_DEVICES \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    -p $SAFETY_PORT:$SAFETY_PORT \
-    --ipc=host \
-    vllm/vllm-openai:latest \
-    --gpu-memory-utilization 0.7 \
-    --model $SAFETY_MODEL \
-    --port $SAFETY_PORT
-```
-
-### Setting up vLLM server on Intel GPU
-
-Refer to [vLLM Documentation for XPU](https://docs.vllm.ai/en/v0.8.2/getting_started/installation/gpu.html?device=xpu) to get a vLLM endpoint. In addition to vLLM side setup which guides towards installing vLLM from sources orself-building vLLM Docker container, Intel provides prebuilt vLLM container to use on systems with Intel GPUs supported by PyTorch XPU backend:
-- [intel/vllm](https://hub.docker.com/r/intel/vllm)
-
-Here is a sample script to start a vLLM server locally via Docker using Intel provided container:
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
-export ZE_AFFINITY_MASK=0
-
-docker run \
-    --pull always \
-    --device /dev/dri \
-    -v /dev/dri/by-path:/dev/dri/by-path \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
-    -p $INFERENCE_PORT:$INFERENCE_PORT \
-    --ipc=host \
-    intel/vllm:xpu \
-    --gpu-memory-utilization 0.7 \
-    --model $INFERENCE_MODEL \
-    --port $INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a vLLM with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export ZE_AFFINITY_MASK=1
-
-docker run \
-    --pull always \
-    --device /dev/dri \
-    -v /dev/dri/by-path:/dev/dri/by-path \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-    --env ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK \
-    -p $SAFETY_PORT:$SAFETY_PORT \
-    --ipc=host \
-    intel/vllm:xpu \
-    --gpu-memory-utilization 0.7 \
-    --model $SAFETY_MODEL \
-    --port $SAFETY_PORT
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with vLLM as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export LLAMA_STACK_PORT=8321
-
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1 \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env SAFETY_VLLM_URL=http://host.docker.internal:$SAFETY_PORT/v1
-```
-
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-export INFERENCE_PORT=8000
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export LLAMA_STACK_PORT=8321
-
-cd distributions/remote-vllm
-llama stack build --template remote-vllm --image-type conda
-
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://localhost:$INFERENCE_PORT/v1
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env VLLM_URL=http://localhost:$INFERENCE_PORT/v1 \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env SAFETY_VLLM_URL=http://localhost:$SAFETY_PORT/v1
-```
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
deleted file mode 100644
index e306a771b..000000000
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ /dev/null
@@ -1,152 +0,0 @@
-version: 2
-image_name: remote-vllm
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: vllm-inference
-    provider_type: remote::vllm
-    config:
-      url: ${env.VLLM_URL:=http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-      api_token: ${env.VLLM_API_TOKEN:=fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-  - provider_id: vllm-safety
-    provider_type: remote::vllm
-    config:
-      url: ${env.SAFETY_VLLM_URL}
-      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-      api_token: ${env.VLLM_API_TOKEN:=fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: vllm-inference
-  model_type: llm
-- metadata: {}
-  model_id: ${env.SAFETY_MODEL}
-  provider_id: vllm-safety
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: ${env.SAFETY_MODEL}
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
deleted file mode 100644
index 1dbef96a2..000000000
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ /dev/null
@@ -1,140 +0,0 @@
-version: 2
-image_name: remote-vllm
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: vllm-inference
-    provider_type: remote::vllm
-    config:
-      url: ${env.VLLM_URL:=http://localhost:8000/v1}
-      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-      api_token: ${env.VLLM_API_TOKEN:=fake}
-      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: vllm-inference
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
deleted file mode 100644
index a8e1d9a58..000000000
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::vllm", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "telemetry": ["inline::meta-reference"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-            "remote::wolfram-alpha",
-        ],
-    }
-    name = "remote-vllm"
-    inference_provider = Provider(
-        provider_id="vllm-inference",
-        provider_type="remote::vllm",
-        config=VLLMInferenceAdapterConfig.sample_run_config(
-            url="${env.VLLM_URL:=http://localhost:8000/v1}",
-        ),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="vllm-inference",
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="vllm-safety",
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::wolfram_alpha",
-            provider_id="wolfram-alpha",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use (an external) vLLM server for running LLM inference",
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        Provider(
-                            provider_id="vllm-safety",
-                            provider_type="remote::vllm",
-                            config=VLLMInferenceAdapterConfig.sample_run_config(
-                                url="${env.SAFETY_VLLM_URL}",
-                            ),
-                        ),
-                        embedding_provider,
-                    ],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[
-                    inference_model,
-                    safety_model,
-                    embedding_model,
-                ],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model loaded into the vLLM server",
-            ),
-            "VLLM_URL": (
-                "http://host.docker.internal:5100/v1",
-                "URL of the vLLM server with the main inference model",
-            ),
-            "MAX_TOKENS": (
-                "4096",
-                "Maximum number of tokens for generation",
-            ),
-            "SAFETY_VLLM_URL": (
-                "http://host.docker.internal:5101/v1",
-                "URL of the vLLM server with the safety model",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Name of the safety (Llama-Guard) model to use",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/sambanova/__init__.py b/llama_stack/templates/sambanova/__init__.py
deleted file mode 100644
index 30209fb7f..000000000
--- a/llama_stack/templates/sambanova/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .sambanova import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml
deleted file mode 100644
index ba70f88c6..000000000
--- a/llama_stack/templates/sambanova/build.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use SambaNova for running LLM inference and safety
-  providers:
-    inference:
-    - remote::sambanova
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - remote::sambanova
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-    - remote::wolfram-alpha
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md
deleted file mode 100644
index 1dc76fd3f..000000000
--- a/llama_stack/templates/sambanova/doc_template.md
+++ /dev/null
@@ -1,80 +0,0 @@
----
-orphan: true
----
-# SambaNova Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).
-
-
-## Running Llama Stack with SambaNova
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-
-### Via Docker
-
-```bash
-LLAMA_STACK_PORT=8321
-llama stack build --template sambanova --image-type container
-docker run \
-  -it \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
-```
-
-
-### Via Venv
-
-```bash
-llama stack build --template sambanova --image-type venv
-llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
-```
-
-
-### Via Conda
-
-```bash
-llama stack build --template sambanova --image-type conda
-llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
-```
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
deleted file mode 100644
index b96621b58..000000000
--- a/llama_stack/templates/sambanova/run.yaml
+++ /dev/null
@@ -1,214 +0,0 @@
-version: 2
-image_name: sambanova
-apis:
-- agents
-- inference
-- safety
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: sambanova
-    provider_type: remote::sambanova
-    config:
-      url: https://api.sambanova.ai/v1
-      api_key: ${env.SAMBANOVA_API_KEY}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
-  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
-    provider_type: remote::chromadb
-    config:
-      url: ${env.CHROMADB_URL:+}
-  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
-    provider_type: remote::pgvector
-    config:
-      host: ${env.PGVECTOR_HOST:=localhost}
-      port: ${env.PGVECTOR_PORT:=5432}
-      db: ${env.PGVECTOR_DB:+}
-      user: ${env.PGVECTOR_USER:+}
-      password: ${env.PGVECTOR_PASSWORD:+}
-  safety:
-  - provider_id: sambanova
-    provider_type: remote::sambanova
-    config:
-      url: https://api.sambanova.ai/v1
-      api_key: ${env.SAMBANOVA_API_KEY}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/trace_store.db
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/inference_store.db
-models:
-- metadata: {}
-  model_id: sambanova/Meta-Llama-3.1-8B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Meta-Llama-3.1-405B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Meta-Llama-3.2-1B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Meta-Llama-3.2-3B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Meta-Llama-3.3-70B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Llama-3.2-11B-Vision-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Llama-3.2-90B-Vision-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: sambanova
-  provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: sambanova/Meta-Llama-Guard-3-8B
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: sambanova
-  provider_model_id: sambanova/Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-  provider_shield_id: sambanova/Meta-Llama-Guard-3-8B
-- shield_id: sambanova/Meta-Llama-Guard-3-8B
-  provider_shield_id: sambanova/Meta-Llama-Guard-3-8B
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
deleted file mode 100644
index 428577697..000000000
--- a/llama_stack/templates/sambanova/sambanova.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
-from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
-from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
-from llama_stack.providers.remote.vector_io.pgvector.config import (
-    PGVectorVectorIOConfig,
-)
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::sambanova", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["remote::sambanova"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-            "remote::wolfram-alpha",
-        ],
-    }
-    name = "sambanova"
-    inference_provider = Provider(
-        provider_id=name,
-        provider_type=f"remote::{name}",
-        config=SambaNovaImplConfig.sample_run_config(),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    vector_io_providers = [
-        Provider(
-            provider_id="faiss",
-            provider_type="inline::faiss",
-            config=FaissVectorIOConfig.sample_run_config(
-                __distro_dir__=f"~/.llama/distributions/{name}",
-            ),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
-            provider_type="remote::chromadb",
-            config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
-        ),
-        Provider(
-            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
-            provider_type="remote::pgvector",
-            config=PGVectorVectorIOConfig.sample_run_config(
-                db="${env.PGVECTOR_DB:+}",
-                user="${env.PGVECTOR_USER:+}",
-                password="${env.PGVECTOR_PASSWORD:+}",
-            ),
-        ),
-    ]
-
-    available_models = {
-        name: MODEL_ENTRIES,
-    }
-    default_models = get_model_registry(available_models)
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::wolfram_alpha",
-            provider_id="wolfram-alpha",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use SambaNova for running LLM inference and safety",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": vector_io_providers,
-                },
-                default_models=default_models + [embedding_model],
-                default_shields=[
-                    ShieldInput(
-                        shield_id="meta-llama/Llama-Guard-3-8B", provider_shield_id="sambanova/Meta-Llama-Guard-3-8B"
-                    ),
-                    ShieldInput(
-                        shield_id="sambanova/Meta-Llama-Guard-3-8B",
-                        provider_shield_id="sambanova/Meta-Llama-Guard-3-8B",
-                    ),
-                ],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMASTACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "SAMBANOVA_API_KEY": (
-                "",
-                "SambaNova API Key",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/starter/build.yaml b/llama_stack/templates/starter/build.yaml
index 3b48dcf7a..0f61ea91e 100644
--- a/llama_stack/templates/starter/build.yaml
+++ b/llama_stack/templates/starter/build.yaml
@@ -12,6 +12,14 @@ distribution_spec:
     - remote::groq
     - remote::sambanova
     - remote::vllm
+    - remote::tgi
+    - remote::cerebras
+    - remote::llama-openai-compat
+    - remote::nvidia
+    - remote::hf::serverless
+    - remote::hf::endpoint
+    - remote::bedrock
+    - remote::passthrough
     - inline::sentence-transformers
     vector_io:
     - inline::sqlite-vec
@@ -25,6 +33,8 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    post_training:
+    - inline::huggingface
     eval:
     - inline::meta-reference
     datasetio:
diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml
index 00faf029e..fbc2c829a 100644
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@@ -6,6 +6,7 @@ apis:
 - eval
 - files
 - inference
+- post_training
 - safety
 - scoring
 - telemetry
@@ -13,70 +14,107 @@ apis:
 - vector_io
 providers:
   inference:
-  - provider_id: openai
+  - provider_id: ${env.ENABLE_OPENAI:=__disabled__}
     provider_type: remote::openai
     config:
       api_key: ${env.OPENAI_API_KEY:+}
-  - provider_id: fireworks
+  - provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
     provider_type: remote::fireworks
     config:
       url: https://api.fireworks.ai/inference/v1
       api_key: ${env.FIREWORKS_API_KEY:+}
-  - provider_id: together
+  - provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
     provider_type: remote::together
     config:
       url: https://api.together.xyz/v1
       api_key: ${env.TOGETHER_API_KEY:+}
-  - provider_id: ollama
+  - provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
     provider_type: remote::ollama
     config:
       url: ${env.OLLAMA_URL:=http://localhost:11434}
-      raise_on_connect_error: false
-  - provider_id: anthropic
+  - provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
     provider_type: remote::anthropic
     config:
       api_key: ${env.ANTHROPIC_API_KEY:+}
-  - provider_id: gemini
+  - provider_id: ${env.ENABLE_GEMINI:=__disabled__}
     provider_type: remote::gemini
     config:
       api_key: ${env.GEMINI_API_KEY:+}
-  - provider_id: groq
+  - provider_id: ${env.ENABLE_GROQ:=__disabled__}
     provider_type: remote::groq
     config:
       url: https://api.groq.com
       api_key: ${env.GROQ_API_KEY:+}
-  - provider_id: sambanova
+  - provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
     provider_type: remote::sambanova
     config:
       url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY:+}
-  - provider_id: vllm
+  - provider_id: ${env.ENABLE_VLLM:=__disabled__}
     provider_type: remote::vllm
     config:
       url: ${env.VLLM_URL:=http://localhost:8000/v1}
       max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
       api_token: ${env.VLLM_API_TOKEN:=fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:=true}
-  - provider_id: sentence-transformers
+  - provider_id: ${env.ENABLE_TGI:=__disabled__}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:+}
+  - provider_id: ${env.ENABLE_CEREBRAS:=__disabled__}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:+}
+  - provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}
+    provider_type: remote::llama-openai-compat
+    config:
+      openai_compat_api_base: https://api.llama.com/compat/v1/
+      api_key: ${env.LLAMA_API_KEY:+:}
+  - provider_id: ${env.ENABLE_NVIDIA:=__disabled__}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:__disabled__}
+      api_key: ${env.NVIDIA_API_KEY:+}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__}
+    provider_type: remote::hf::serverless
+    config:
+      huggingface_repo: ${env.INFERENCE_MODEL:+:}
+      api_token: ${env.HF_API_TOKEN:+:}
+  - provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__}
+    provider_type: remote::hf::endpoint
+    config:
+      endpoint_name: ${env.INFERENCE_ENDPOINT_NAME:+:}
+      api_token: ${env.HF_API_TOKEN:+:}
+  - provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
+    provider_type: remote::bedrock
+    config: {}
+  - provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__}
+    provider_type: remote::passthrough
+    config:
+      url: ${env.PASSTHROUGH_URL:+:}
+      api_key: ${env.PASSTHROUGH_API_KEY:+:}
+  - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}
     provider_type: inline::sentence-transformers
     config: {}
   vector_io:
-  - provider_id: faiss
+  - provider_id: ${env.ENABLE_FAISS:=faiss}
     provider_type: inline::faiss
     config:
       kvstore:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
-  - provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
+  - provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
     provider_type: inline::sqlite-vec
     config:
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
-  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
+  - provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
     provider_type: remote::chromadb
     config:
       url: ${env.CHROMADB_URL:+}
-  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
+  - provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
     provider_type: remote::pgvector
     config:
       host: ${env.PGVECTOR_HOST:=localhost}
@@ -115,6 +153,13 @@ providers:
       service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
       sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
       sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
+  post_training:
+  - provider_id: huggingface
+    provider_type: inline::huggingface
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
   eval:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -174,645 +219,649 @@ inference_store:
   db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db
 models:
 - metadata: {}
-  model_id: openai/gpt-4o
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/gpt-4o
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: openai/gpt-4o
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-4o-mini
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/gpt-4o-mini
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: openai/gpt-4o-mini
   model_type: llm
 - metadata: {}
-  model_id: openai/chatgpt-4o-latest
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/chatgpt-4o-latest
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: openai/chatgpt-4o-latest
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-3.5-turbo-0125
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-0125
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-3.5-turbo-0125
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-3.5-turbo
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-3.5-turbo
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-3.5-turbo-instruct
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-3.5-turbo-instruct
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-3.5-turbo-instruct
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-4
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-4
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-4-turbo
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4-turbo
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-4-turbo
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-4o
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-4o
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-4o-2024-08-06
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-2024-08-06
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-4o-2024-08-06
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-4o-mini
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-mini
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-4o-mini
   model_type: llm
 - metadata: {}
-  model_id: openai/gpt-4o-audio-preview
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/gpt-4o-audio-preview
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: gpt-4o-audio-preview
   model_type: llm
 - metadata: {}
-  model_id: openai/chatgpt-4o-latest
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/chatgpt-4o-latest
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: chatgpt-4o-latest
   model_type: llm
 - metadata: {}
-  model_id: openai/o1
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: o1
   model_type: llm
 - metadata: {}
-  model_id: openai/o1-mini
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/o1-mini
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: o1-mini
   model_type: llm
 - metadata: {}
-  model_id: openai/o3-mini
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/o3-mini
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: o3-mini
   model_type: llm
 - metadata: {}
-  model_id: openai/o4-mini
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/o4-mini
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: o4-mini
   model_type: llm
 - metadata:
     embedding_dimension: 1536
     context_length: 8192
-  model_id: openai/text-embedding-3-small
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/text-embedding-3-small
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: openai/text-embedding-3-small
   model_type: embedding
 - metadata:
     embedding_dimension: 3072
     context_length: 8192
-  model_id: openai/text-embedding-3-large
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/openai/text-embedding-3-large
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: openai/text-embedding-3-large
   model_type: embedding
 - metadata:
     embedding_dimension: 1536
     context_length: 8192
-  model_id: openai/text-embedding-3-small
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-small
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: text-embedding-3-small
   model_type: embedding
 - metadata:
     embedding_dimension: 3072
     context_length: 8192
-  model_id: openai/text-embedding-3-large
-  provider_id: openai
+  model_id: ${env.ENABLE_OPENAI:=__disabled__}/text-embedding-3-large
+  provider_id: ${env.ENABLE_OPENAI:=__disabled__}
   provider_model_id: text-embedding-3-large
   model_type: embedding
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-8b-instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-3.1-8B-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-70b-instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-3.1-70B-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p1-405b-instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-3b-instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-3.2-3B-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-v3p3-70b-instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-3.3-70B-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-8b
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-8b
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-guard-3-8b
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-Guard-3-8B
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-8B
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-guard-3-8b
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama-guard-3-11b-vision
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama4-scout-instruct-basic
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-scout-instruct-basic
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic
   model_type: llm
 - metadata: {}
-  model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/accounts/fireworks/models/llama4-maverick-instruct-basic
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
   model_type: llm
 - metadata: {}
-  model_id: fireworks/meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic
   model_type: llm
 - metadata:
     embedding_dimension: 768
     context_length: 8192
-  model_id: fireworks/nomic-ai/nomic-embed-text-v1.5
-  provider_id: fireworks
+  model_id: ${env.ENABLE_FIREWORKS:=__disabled__}/nomic-ai/nomic-embed-text-v1.5
+  provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
   provider_model_id: nomic-ai/nomic-embed-text-v1.5
   model_type: embedding
 - metadata: {}
-  model_id: together/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.1-8B-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.1-70B-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-70B-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.2-3B-Instruct-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.2-3B-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-3.3-70B-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Meta-Llama-Guard-3-8B
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Meta-Llama-Guard-3-8B
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-Guard-3-8B
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-8B
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-Guard-3-11B-Vision
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
   model_type: llm
 - metadata:
     embedding_dimension: 768
     context_length: 8192
-  model_id: togethercomputer/m2-bert-80M-8k-retrieval
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-8k-retrieval
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
   model_type: embedding
 - metadata:
     embedding_dimension: 768
     context_length: 32768
-  model_id: togethercomputer/m2-bert-80M-32k-retrieval
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/togethercomputer/m2-bert-80M-32k-retrieval
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
   model_type: embedding
 - metadata: {}
-  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
   model_type: llm
 - metadata: {}
-  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together
+  model_id: ${env.ENABLE_TOGETHER:=__disabled__}/together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
+  provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
   provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
   model_type: llm
 - metadata: {}
-  model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:=__disabled__}
-  provider_id: ollama
+  model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_INFERENCE_MODEL:=__disabled__}
+  provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
   provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:=__disabled__}
   model_type: llm
 - metadata:
     embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:=384}
-  model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
-  provider_id: ollama
+  model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
+  provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
   provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}
   model_type: embedding
 - metadata: {}
-  model_id: anthropic/claude-3-5-sonnet-latest
-  provider_id: anthropic
+  model_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.OLLAMA_SAFETY_MODEL:=__disabled__}
+  provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
+  provider_model_id: ${env.OLLAMA_SAFETY_MODEL:=__disabled__}
+  model_type: llm
+- metadata: {}
+  model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-sonnet-latest
+  provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
   provider_model_id: anthropic/claude-3-5-sonnet-latest
   model_type: llm
 - metadata: {}
-  model_id: anthropic/claude-3-7-sonnet-latest
-  provider_id: anthropic
+  model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-7-sonnet-latest
+  provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
   provider_model_id: anthropic/claude-3-7-sonnet-latest
   model_type: llm
 - metadata: {}
-  model_id: anthropic/claude-3-5-haiku-latest
-  provider_id: anthropic
+  model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/claude-3-5-haiku-latest
+  provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
   provider_model_id: anthropic/claude-3-5-haiku-latest
   model_type: llm
 - metadata:
     embedding_dimension: 1024
     context_length: 32000
-  model_id: anthropic/voyage-3
-  provider_id: anthropic
+  model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3
+  provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
   provider_model_id: anthropic/voyage-3
   model_type: embedding
 - metadata:
     embedding_dimension: 512
     context_length: 32000
-  model_id: anthropic/voyage-3-lite
-  provider_id: anthropic
+  model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-3-lite
+  provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
   provider_model_id: anthropic/voyage-3-lite
   model_type: embedding
 - metadata:
     embedding_dimension: 1024
     context_length: 32000
-  model_id: anthropic/voyage-code-3
-  provider_id: anthropic
+  model_id: ${env.ENABLE_ANTHROPIC:=__disabled__}/anthropic/voyage-code-3
+  provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
   provider_model_id: anthropic/voyage-code-3
   model_type: embedding
 - metadata: {}
-  model_id: gemini/gemini-1.5-flash
-  provider_id: gemini
+  model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-flash
+  provider_id: ${env.ENABLE_GEMINI:=__disabled__}
   provider_model_id: gemini/gemini-1.5-flash
   model_type: llm
 - metadata: {}
-  model_id: gemini/gemini-1.5-pro
-  provider_id: gemini
+  model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-1.5-pro
+  provider_id: ${env.ENABLE_GEMINI:=__disabled__}
   provider_model_id: gemini/gemini-1.5-pro
   model_type: llm
 - metadata: {}
-  model_id: gemini/gemini-2.0-flash
-  provider_id: gemini
+  model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.0-flash
+  provider_id: ${env.ENABLE_GEMINI:=__disabled__}
   provider_model_id: gemini/gemini-2.0-flash
   model_type: llm
 - metadata: {}
-  model_id: gemini/gemini-2.5-flash
-  provider_id: gemini
+  model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-flash
+  provider_id: ${env.ENABLE_GEMINI:=__disabled__}
   provider_model_id: gemini/gemini-2.5-flash
   model_type: llm
 - metadata: {}
-  model_id: gemini/gemini-2.5-pro
-  provider_id: gemini
+  model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/gemini-2.5-pro
+  provider_id: ${env.ENABLE_GEMINI:=__disabled__}
   provider_model_id: gemini/gemini-2.5-pro
   model_type: llm
 - metadata:
     embedding_dimension: 768
     context_length: 2048
-  model_id: gemini/text-embedding-004
-  provider_id: gemini
+  model_id: ${env.ENABLE_GEMINI:=__disabled__}/gemini/text-embedding-004
+  provider_id: ${env.ENABLE_GEMINI:=__disabled__}
   provider_model_id: gemini/text-embedding-004
   model_type: embedding
 - metadata: {}
-  model_id: groq/llama3-8b-8192
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-8b-8192
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama3-8b-8192
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-3.1-8B-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama3-8b-8192
   model_type: llm
 - metadata: {}
-  model_id: groq/llama-3.1-8b-instant
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.1-8b-instant
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-3.1-8b-instant
   model_type: llm
 - metadata: {}
-  model_id: groq/llama3-70b-8192
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama3-70b-8192
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama3-70b-8192
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-3-70B-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3-70B-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama3-70b-8192
   model_type: llm
 - metadata: {}
-  model_id: groq/llama-3.3-70b-versatile
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.3-70b-versatile
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-3.3-70b-versatile
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-3.3-70B-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-3.3-70b-versatile
   model_type: llm
 - metadata: {}
-  model_id: groq/llama-3.2-3b-preview
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-3.2-3b-preview
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-3.2-3b-preview
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-3.2-3B-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-3.2-3b-preview
   model_type: llm
 - metadata: {}
-  model_id: groq/llama-4-scout-17b-16e-instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-scout-17b-16e-instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-4-scout-17b-16e-instruct
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-scout-17b-16e-instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct
   model_type: llm
 - metadata: {}
-  model_id: groq/llama-4-maverick-17b-128e-instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/llama-4-maverick-17b-128e-instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/llama-4-maverick-17b-128e-instruct
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/groq/meta-llama/llama-4-maverick-17b-128e-instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
   model_type: llm
 - metadata: {}
-  model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: groq
+  model_id: ${env.ENABLE_GROQ:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: ${env.ENABLE_GROQ:=__disabled__}
   provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Meta-Llama-3.1-8B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-8B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-3.1-8B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-8B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Meta-Llama-3.1-405B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.1-405B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.1-405B-Instruct-FP8
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Meta-Llama-3.2-1B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-1B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-3.2-1B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-1B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Meta-Llama-3.2-3B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.2-3B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-3.2-3B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-3B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Meta-Llama-3.3-70B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-3.3-70B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-3.3-70B-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.3-70B-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Llama-3.2-11B-Vision-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-11B-Vision-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-11B-Vision-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Llama-3.2-90B-Vision-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-3.2-90B-Vision-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-3.2-90B-Vision-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Scout-17B-16E-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-4-Maverick-17B-128E-Instruct
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct
   model_type: llm
 - metadata: {}
-  model_id: sambanova/Meta-Llama-Guard-3-8B
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/sambanova/Meta-Llama-Guard-3-8B
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-Guard-3-8B
   model_type: llm
 - metadata: {}
-  model_id: sambanova/meta-llama/Llama-Guard-3-8B
-  provider_id: sambanova
+  model_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/meta-llama/Llama-Guard-3-8B
+  provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
   provider_model_id: sambanova/Meta-Llama-Guard-3-8B
   model_type: llm
 - metadata: {}
-  model_id: vllm/${env.VLLM_INFERENCE_MODEL:=__disabled__}
-  provider_id: vllm
+  model_id: ${env.ENABLE_VLLM:=__disabled__}/${env.VLLM_INFERENCE_MODEL:=__disabled__}
+  provider_id: ${env.ENABLE_VLLM:=__disabled__}
   provider_model_id: ${env.VLLM_INFERENCE_MODEL:=__disabled__}
   model_type: llm
 - metadata:
     embedding_dimension: 384
   model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
+  provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}
   model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
+shields: []
 vector_dbs: []
 datasets: []
 scoring_fns: []
diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py
index c0f2646d7..bbeef6b72 100644
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@@ -9,13 +9,13 @@ from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import (
     ModelInput,
     Provider,
-    ShieldInput,
     ToolGroupInput,
 )
 from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
+from llama_stack.providers.inline.post_training.huggingface import HuggingFacePostTrainingConfig
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
     SQLiteVectorIOConfig,
@@ -24,6 +24,7 @@ from llama_stack.providers.remote.inference.anthropic.config import AnthropicCon
 from llama_stack.providers.remote.inference.anthropic.models import (
     MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES,
 )
+from llama_stack.providers.remote.inference.cerebras.config import CerebrasImplConfig
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.models import (
     MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
@@ -36,15 +37,24 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import (
     MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
 )
+from llama_stack.providers.remote.inference.llama_openai_compat.config import (
+    LlamaCompatConfig,
+)
+from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.models import (
     MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
 )
+from llama_stack.providers.remote.inference.passthrough.config import (
+    PassthroughImplConfig,
+)
 from llama_stack.providers.remote.inference.sambanova.config import SambaNovaImplConfig
 from llama_stack.providers.remote.inference.sambanova.models import (
     MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
 )
+from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
+from llama_stack.providers.remote.inference.tgi.config import InferenceAPIImplConfig, TGIImplConfig
 from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.models import (
     MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
@@ -54,6 +64,7 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC
 from llama_stack.providers.remote.vector_io.pgvector.config import (
     PGVectorVectorIOConfig,
 )
+from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
 from llama_stack.templates.template import (
@@ -67,21 +78,25 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
     # in this template, we allow each API key to be optional
     providers = [
         (
+            "${env.ENABLE_OPENAI:=__disabled__}",
             "openai",
             OPENAI_MODEL_ENTRIES,
             OpenAIConfig.sample_run_config(api_key="${env.OPENAI_API_KEY:+}"),
         ),
         (
+            "${env.ENABLE_FIREWORKS:=__disabled__}",
             "fireworks",
             FIREWORKS_MODEL_ENTRIES,
             FireworksImplConfig.sample_run_config(api_key="${env.FIREWORKS_API_KEY:+}"),
         ),
         (
+            "${env.ENABLE_TOGETHER:=__disabled__}",
             "together",
             TOGETHER_MODEL_ENTRIES,
             TogetherImplConfig.sample_run_config(api_key="${env.TOGETHER_API_KEY:+}"),
         ),
         (
+            "${env.ENABLE_OLLAMA:=__disabled__}",
             "ollama",
             [
                 ProviderModelEntry(
@@ -95,32 +110,41 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
                         "embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:=384}",
                     },
                 ),
+                ProviderModelEntry(
+                    provider_model_id="${env.OLLAMA_SAFETY_MODEL:=__disabled__}",
+                    model_type=ModelType.llm,
+                ),
             ],
             OllamaImplConfig.sample_run_config(
                 url="${env.OLLAMA_URL:=http://localhost:11434}", raise_on_connect_error=False
             ),
         ),
         (
+            "${env.ENABLE_ANTHROPIC:=__disabled__}",
             "anthropic",
             ANTHROPIC_MODEL_ENTRIES,
             AnthropicConfig.sample_run_config(api_key="${env.ANTHROPIC_API_KEY:+}"),
         ),
         (
+            "${env.ENABLE_GEMINI:=__disabled__}",
             "gemini",
             GEMINI_MODEL_ENTRIES,
             GeminiConfig.sample_run_config(api_key="${env.GEMINI_API_KEY:+}"),
         ),
         (
+            "${env.ENABLE_GROQ:=__disabled__}",
             "groq",
             GROQ_MODEL_ENTRIES,
             GroqConfig.sample_run_config(api_key="${env.GROQ_API_KEY:+}"),
         ),
         (
+            "${env.ENABLE_SAMBANOVA:=__disabled__}",
             "sambanova",
             SAMBANOVA_MODEL_ENTRIES,
             SambaNovaImplConfig.sample_run_config(api_key="${env.SAMBANOVA_API_KEY:+}"),
         ),
         (
+            "${env.ENABLE_VLLM:=__disabled__}",
             "vllm",
             [
                 ProviderModelEntry(
@@ -132,14 +156,88 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
                 url="${env.VLLM_URL:=http://localhost:8000/v1}",
             ),
         ),
+        (
+            "${env.ENABLE_TGI:=__disabled__}",
+            "tgi",
+            [],
+            TGIImplConfig.sample_run_config(
+                url="${env.TGI_URL:+}",
+                endpoint_name="${env.INFERENCE_ENDPOINT_NAME:+}",
+            ),
+        ),
+        # TODO: re-add once the Python 3.13 issue is fixed
+        # discussion: https://github.com/meta-llama/llama-stack/pull/2327#discussion_r2156883828
+        # (
+        #     "watsonx",
+        #     [],
+        #     WatsonXConfig.sample_run_config(api_key="${env.WATSONX_API_KEY:}"),
+        # ),
+        (
+            "${env.ENABLE_CEREBRAS:=__disabled__}",
+            "cerebras",
+            [],
+            CerebrasImplConfig.sample_run_config(api_key="${env.CEREBRAS_API_KEY:+}"),
+        ),
+        (
+            "${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}",
+            "llama-openai-compat",
+            [],
+            LlamaCompatConfig.sample_run_config(api_key="${env.LLAMA_API_KEY:+:}"),
+        ),
+        (
+            "${env.ENABLE_NVIDIA:=__disabled__}",
+            "nvidia",
+            [],
+            NVIDIAConfig.sample_run_config(
+                api_key="${env.NVIDIA_API_KEY:+}",
+                url="${env.NVIDIA_BASE_URL:__disabled__}",
+            ),
+        ),
+        (
+            "${env.ENABLE_HF_SERVERLESS:=__disabled__}",
+            "hf::serverless",
+            [],
+            InferenceAPIImplConfig.sample_run_config(
+                api_token="${env.HF_API_TOKEN:+:}",
+                repo="${env.INFERENCE_MODEL:+:}",
+            ),
+        ),
+        (
+            "${env.ENABLE_HF_ENDPOINT:=__disabled__}",
+            "hf::endpoint",
+            [],
+            InferenceEndpointImplConfig.sample_run_config(
+                api_token="${env.HF_API_TOKEN:+:}",
+                endpoint_name="${env.INFERENCE_ENDPOINT_NAME:+:}",
+            ),
+        ),
+        (
+            "${env.ENABLE_BEDROCK:=__disabled__}",
+            "bedrock",
+            [],
+            BedrockBaseConfig.sample_run_config(
+                aws_access_key_id="${env.AWS_ACCESS_KEY_ID:+}",
+                aws_secret_access_key="${env.AWS_SECRET_ACCESS_KEY:+}",
+                aws_session_token="${env.AWS_SESSION_TOKEN:+}",
+                region_name="${env.AWS_DEFAULT_REGION:+}",
+            ),
+        ),
+        (
+            "${env.ENABLE_PASSTHROUGH:=__disabled__}",
+            "passthrough",
+            [],
+            PassthroughImplConfig.sample_run_config(
+                url="${env.PASSTHROUGH_URL:+:}", api_key="${env.PASSTHROUGH_API_KEY:+:}"
+            ),
+        ),
     ]
     inference_providers = []
     available_models = {}
-    for provider_id, model_entries, config in providers:
+    for provider_id, provider_type, model_entries, config in providers:
         inference_providers.append(
             Provider(
                 provider_id=provider_id,
-                provider_type=f"remote::{provider_id}",
+                provider_type=f"remote::{provider_type}",
                 config=config,
             )
         )
@@ -156,6 +254,7 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "post_training": ["inline::huggingface"],
         "eval": ["inline::meta-reference"],
         "datasetio": ["remote::huggingface", "inline::localfs"],
         "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
@@ -170,22 +269,22 @@ def get_distribution_template() -> DistributionTemplate:
 
     vector_io_providers = [
         Provider(
-            provider_id="faiss",
+            provider_id="${env.ENABLE_FAISS:=faiss}",
             provider_type="inline::faiss",
             config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_SQLITE_VEC:+sqlite-vec}",
+            provider_id="${env.ENABLE_SQLITE_VEC:=__disabled__}",
             provider_type="inline::sqlite-vec",
             config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_CHROMADB:+chromadb}",
+            provider_id="${env.ENABLE_CHROMADB:=__disabled__}",
             provider_type="remote::chromadb",
             config=ChromaVectorIOConfig.sample_run_config(url="${env.CHROMADB_URL:+}"),
         ),
         Provider(
-            provider_id="${env.ENABLE_PGVECTOR:+pgvector}",
+            provider_id="${env.ENABLE_PGVECTOR:=__disabled__}",
             provider_type="remote::pgvector",
             config=PGVectorVectorIOConfig.sample_run_config(
                 db="${env.PGVECTOR_DB:+}",
@@ -200,11 +299,15 @@ def get_distribution_template() -> DistributionTemplate:
         config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
     )
     embedding_provider = Provider(
-        provider_id="sentence-transformers",
+        provider_id="${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}",
         provider_type="inline::sentence-transformers",
         config=SentenceTransformersInferenceConfig.sample_run_config(),
     )
-
+    post_training_provider = Provider(
+        provider_id="huggingface",
+        provider_type="inline::huggingface",
+        config=HuggingFacePostTrainingConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+    )
     default_tool_groups = [
         ToolGroupInput(
             toolgroup_id="builtin::websearch",
@@ -242,10 +345,14 @@ def get_distribution_template() -> DistributionTemplate:
                     "inference": inference_providers + [embedding_provider],
                     "vector_io": vector_io_providers,
                     "files": [files_provider],
+                    "post_training": [post_training_provider],
                 },
                 default_models=default_models + [embedding_model],
                 default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
+                # TODO: add a way to enable/disable shields on the fly
+                # default_shields=[
+                #     ShieldInput(provider_id="llama-guard", shield_id="${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-8B}")
+                # ],
             ),
         },
         run_config_env_vars={
diff --git a/llama_stack/templates/tgi/__init__.py b/llama_stack/templates/tgi/__init__.py
deleted file mode 100644
index fa1932f6a..000000000
--- a/llama_stack/templates/tgi/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .tgi import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
deleted file mode 100644
index 3ac3968e8..000000000
--- a/llama_stack/templates/tgi/build.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use (an external) TGI server for running LLM inference
-  providers:
-    inference:
-    - remote::tgi
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/tgi/doc_template.md b/llama_stack/templates/tgi/doc_template.md
deleted file mode 100644
index 68b475893..000000000
--- a/llama_stack/templates/tgi/doc_template.md
+++ /dev/null
@@ -1,137 +0,0 @@
----
-orphan: true
----
-
-# TGI Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-
-## Setting up TGI server
-
-Please check the [TGI Getting Started Guide](https://github.com/huggingface/text-generation-inference?tab=readme-ov-file#get-started) to get a TGI endpoint. Here is a sample script to start a TGI server locally via Docker:
-
-```bash
-export INFERENCE_PORT=8080
-export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-export CUDA_VISIBLE_DEVICES=0
-
-docker run --rm -it \
-  --pull always \
-  -v $HOME/.cache/huggingface:/data \
-  -p $INFERENCE_PORT:$INFERENCE_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference:2.3.1 \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --cuda-memory-fraction 0.7 \
-  --model-id $INFERENCE_MODEL \
-  --port $INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
-
-```bash
-export SAFETY_PORT=8081
-export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
-export CUDA_VISIBLE_DEVICES=1
-
-docker run --rm -it \
-  --pull always \
-  -v $HOME/.cache/huggingface:/data \
-  -p $SAFETY_PORT:$SAFETY_PORT \
-  --gpus $CUDA_VISIBLE_DEVICES \
-  ghcr.io/huggingface/text-generation-inference:2.3.1 \
-  --dtype bfloat16 \
-  --usage-stats off \
-  --sharded false \
-  --model-id $SAFETY_MODEL \
-  --port $SAFETY_PORT
-```
-
-## Running Llama Stack
-
-Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://host.docker.internal:$INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-# You need a local checkout of llama-stack to run this, get it using
-# git clone https://github.com/meta-llama/llama-stack.git
-cd /path/to/llama-stack
-
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://host.docker.internal:$INFERENCE_PORT \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env TGI_SAFETY_URL=http://host.docker.internal:$SAFETY_PORT
-```
-
-### Via Conda
-
-Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
-
-```bash
-llama stack build --template {{ name }} --image-type conda
-llama stack run ./run.yaml
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://127.0.0.1:$INFERENCE_PORT
-```
-
-If you are using Llama Stack Safety / Shield APIs, use:
-
-```bash
-llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env TGI_URL=http://127.0.0.1:$INFERENCE_PORT \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env TGI_SAFETY_URL=http://127.0.0.1:$SAFETY_PORT
-```
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
deleted file mode 100644
index 63da62a03..000000000
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ /dev/null
@@ -1,132 +0,0 @@
-version: 2
-image_name: tgi
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: tgi-inference
-    provider_type: remote::tgi
-    config:
-      url: ${env.TGI_URL}
-  - provider_id: tgi-safety
-    provider_type: remote::tgi
-    config:
-      url: ${env.TGI_SAFETY_URL}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: tgi-inference
-  model_type: llm
-- metadata: {}
-  model_id: ${env.SAFETY_MODEL}
-  provider_id: tgi-safety
-  model_type: llm
-shields:
-- shield_id: ${env.SAFETY_MODEL}
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
deleted file mode 100644
index 430494121..000000000
--- a/llama_stack/templates/tgi/run.yaml
+++ /dev/null
@@ -1,131 +0,0 @@
-version: 2
-image_name: tgi
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: tgi-inference
-    provider_type: remote::tgi
-    config:
-      url: ${env.TGI_URL}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: tgi-inference
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields: []
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-server:
-  port: 8321
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
deleted file mode 100644
index 394cde18e..000000000
--- a/llama_stack/templates/tgi/tgi.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.tgi import TGIImplConfig
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::tgi", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
-    }
-    name = "tgi"
-    inference_provider = Provider(
-        provider_id="tgi-inference",
-        provider_type="remote::tgi",
-        config=TGIImplConfig.sample_run_config(
-            url="${env.TGI_URL}",
-        ),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-
-    inference_model = ModelInput(
-        model_id="${env.INFERENCE_MODEL}",
-        provider_id="tgi-inference",
-    )
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="tgi-safety",
-    )
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-    ]
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use (an external) TGI server for running LLM inference",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[inference_model, embedding_model],
-                default_tool_groups=default_tool_groups,
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        Provider(
-                            provider_id="tgi-safety",
-                            provider_type="remote::tgi",
-                            config=TGIImplConfig.sample_run_config(
-                                url="${env.TGI_SAFETY_URL}",
-                            ),
-                        ),
-                    ],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=[
-                    inference_model,
-                    safety_model,
-                ],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "INFERENCE_MODEL": (
-                "meta-llama/Llama-3.2-3B-Instruct",
-                "Inference model loaded into the TGI server",
-            ),
-            "TGI_URL": (
-                "http://127.0.0.1:8080/v1",
-                "URL of the TGI server with the main inference model",
-            ),
-            "TGI_SAFETY_URL": (
-                "http://127.0.0.1:8081/v1",
-                "URL of the TGI server with the safety model",
-            ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Name of the safety (Llama-Guard) model to use",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/together/__init__.py b/llama_stack/templates/together/__init__.py
deleted file mode 100644
index 757995b6b..000000000
--- a/llama_stack/templates/together/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .together import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
deleted file mode 100644
index 518a843da..000000000
--- a/llama_stack/templates/together/build.yaml
+++ /dev/null
@@ -1,36 +0,0 @@
-version: 2
-distribution_spec:
-  description: Use Together.AI for running LLM inference
-  providers:
-    inference:
-    - remote::together
-    - inline::sentence-transformers
-    vector_io:
-    - inline::faiss
-    - remote::chromadb
-    - remote::pgvector
-    safety:
-    - inline::llama-guard
-    agents:
-    - inline::meta-reference
-    telemetry:
-    - inline::meta-reference
-    eval:
-    - inline::meta-reference
-    datasetio:
-    - remote::huggingface
-    - inline::localfs
-    scoring:
-    - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
-    tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::rag-runtime
-    - remote::model-context-protocol
-    - remote::wolfram-alpha
-image_type: conda
-additional_pip_packages:
-- aiosqlite
-- sqlalchemy[asyncio]
diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md
deleted file mode 100644
index 5a01595c4..000000000
--- a/llama_stack/templates/together/doc_template.md
+++ /dev/null
@@ -1,69 +0,0 @@
----
-orphan: true
----
-# Together Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars %}
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a Together API Key. You can get one by visiting [together.xyz](https://together.xyz/).
-
-
-## Running Llama Stack with Together
-
-You can do this via Conda (build code) or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=8321
-docker run \
-  -it \
-  --pull always \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env TOGETHER_API_KEY=$TOGETHER_API_KEY
-```
-
-### Via Conda
-
-```bash
-llama stack build --template {{ name }} --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env TOGETHER_API_KEY=$TOGETHER_API_KEY
-```
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
deleted file mode 100644
index 7ae2a1d1a..000000000
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ /dev/null
@@ -1,279 +0,0 @@
-version: 2
-image_name: together
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: together
-    provider_type: remote::together
-    config:
-      url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:+}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  - provider_id: llama-guard-vision
-    provider_type: inline::llama-guard
-    config: {}
-  - provider_id: code-scanner
-    provider_type: inline::code-scanner
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db
-models:
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-Guard-3-8B
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: together
-  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  model_type: llm
-- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: togethercomputer/m2-bert-80M-8k-retrieval
-  provider_id: together
-  provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
-  model_type: embedding
-- metadata:
-    embedding_dimension: 768
-    context_length: 32768
-  model_id: togethercomputer/m2-bert-80M-32k-retrieval
-  provider_id: together
-  provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
-  model_type: embedding
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata: {}
-  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-  provider_id: llama-guard
-- shield_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: llama-guard-vision
-- shield_id: CodeScanner
-  provider_id: code-scanner
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
deleted file mode 100644
index dc09aeac9..000000000
--- a/llama_stack/templates/together/run.yaml
+++ /dev/null
@@ -1,269 +0,0 @@
-version: 2
-image_name: together
-apis:
-- agents
-- datasetio
-- eval
-- inference
-- safety
-- scoring
-- telemetry
-- tool_runtime
-- vector_io
-providers:
-  inference:
-  - provider_id: together
-    provider_type: remote::together
-    config:
-      url: https://api.together.xyz/v1
-      api_key: ${env.TOGETHER_API_KEY:+}
-  - provider_id: sentence-transformers
-    provider_type: inline::sentence-transformers
-    config: {}
-  vector_io:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config:
-      excluded_categories: []
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
-      responses_store:
-        type: sqlite
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
-      sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
-      sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db
-  eval:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
-  datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
-  - provider_id: localfs
-    provider_type: inline::localfs
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
-  scoring:
-  - provider_id: basic
-    provider_type: inline::basic
-    config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:+}
-  tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:+}
-      max_results: 3
-  - provider_id: rag-runtime
-    provider_type: inline::rag-runtime
-    config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
-  - provider_id: wolfram-alpha
-    provider_type: remote::wolfram-alpha
-    config:
-      api_key: ${env.WOLFRAM_ALPHA_API_KEY:+}
-metadata_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db
-inference_store:
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db
-models:
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Meta-Llama-Guard-3-8B
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-8B
-  provider_id: together
-  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  provider_id: together
-  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-Guard-3-11B-Vision
-  provider_id: together
-  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
-  model_type: llm
-- metadata:
-    embedding_dimension: 768
-    context_length: 8192
-  model_id: togethercomputer/m2-bert-80M-8k-retrieval
-  provider_id: together
-  provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval
-  model_type: embedding
-- metadata:
-    embedding_dimension: 768
-    context_length: 32768
-  model_id: togethercomputer/m2-bert-80M-32k-retrieval
-  provider_id: together
-  provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval
-  model_type: embedding
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata: {}
-  model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata: {}
-  model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  provider_id: together
-  provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
-  model_type: llm
-- metadata:
-    embedding_dimension: 384
-  model_id: all-MiniLM-L6-v2
-  provider_id: sentence-transformers
-  model_type: embedding
-shields:
-- shield_id: meta-llama/Llama-Guard-3-8B
-vector_dbs: []
-datasets: []
-scoring_fns: []
-benchmarks: []
-tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
-- toolgroup_id: builtin::rag
-  provider_id: rag-runtime
-- toolgroup_id: builtin::wolfram_alpha
-  provider_id: wolfram-alpha
-server:
-  port: 8321
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
deleted file mode 100644
index 4c64ff3cd..000000000
--- a/llama_stack/templates/together/together.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from pathlib import Path
-
-from llama_stack.apis.models import ModelType
-from llama_stack.distribution.datatypes import (
-    ModelInput,
-    Provider,
-    ShieldInput,
-    ToolGroupInput,
-)
-from llama_stack.providers.inline.inference.sentence_transformers import (
-    SentenceTransformersInferenceConfig,
-)
-from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
-from llama_stack.providers.remote.inference.together import TogetherImplConfig
-from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
-from llama_stack.templates.template import (
-    DistributionTemplate,
-    RunConfigSettings,
-    get_model_registry,
-)
-
-
-def get_distribution_template() -> DistributionTemplate:
-    providers = {
-        "inference": ["remote::together", "inline::sentence-transformers"],
-        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
-        "safety": ["inline::llama-guard"],
-        "agents": ["inline::meta-reference"],
-        "telemetry": ["inline::meta-reference"],
-        "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-            "remote::wolfram-alpha",
-        ],
-    }
-    name = "together"
-    inference_provider = Provider(
-        provider_id="together",
-        provider_type="remote::together",
-        config=TogetherImplConfig.sample_run_config(),
-    )
-    vector_io_provider = Provider(
-        provider_id="faiss",
-        provider_type="inline::faiss",
-        config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-    )
-    embedding_provider = Provider(
-        provider_id="sentence-transformers",
-        provider_type="inline::sentence-transformers",
-        config=SentenceTransformersInferenceConfig.sample_run_config(),
-    )
-    available_models = {
-        "together": MODEL_ENTRIES,
-    }
-    default_models = get_model_registry(available_models)
-    default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::rag",
-            provider_id="rag-runtime",
-        ),
-        ToolGroupInput(
-            toolgroup_id="builtin::wolfram_alpha",
-            provider_id="wolfram-alpha",
-        ),
-    ]
-    embedding_model = ModelInput(
-        model_id="all-MiniLM-L6-v2",
-        provider_id="sentence-transformers",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 384,
-        },
-    )
-
-    return DistributionTemplate(
-        name=name,
-        distro_type="self_hosted",
-        description="Use Together.AI for running LLM inference",
-        container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
-        providers=providers,
-        available_models_by_provider=available_models,
-        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [inference_provider, embedding_provider],
-                    "vector_io": [vector_io_provider],
-                },
-                default_models=default_models + [embedding_model],
-                default_tool_groups=default_tool_groups,
-                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
-            ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        embedding_provider,
-                    ],
-                    "vector_io": [vector_io_provider],
-                    "safety": [
-                        Provider(
-                            provider_id="llama-guard",
-                            provider_type="inline::llama-guard",
-                            config={},
-                        ),
-                        Provider(
-                            provider_id="llama-guard-vision",
-                            provider_type="inline::llama-guard",
-                            config={},
-                        ),
-                        Provider(
-                            provider_id="code-scanner",
-                            provider_type="inline::code-scanner",
-                            config={},
-                        ),
-                    ],
-                },
-                default_models=[
-                    *default_models,
-                    embedding_model,
-                ],
-                default_shields=[
-                    ShieldInput(
-                        shield_id="meta-llama/Llama-Guard-3-8B",
-                        provider_id="llama-guard",
-                    ),
-                    ShieldInput(
-                        shield_id="meta-llama/Llama-Guard-3-11B-Vision",
-                        provider_id="llama-guard-vision",
-                    ),
-                    ShieldInput(
-                        shield_id="CodeScanner",
-                        provider_id="code-scanner",
-                    ),
-                ],
-                default_tool_groups=default_tool_groups,
-            ),
-        },
-        run_config_env_vars={
-            "LLAMA_STACK_PORT": (
-                "8321",
-                "Port for the Llama Stack distribution server",
-            ),
-            "TOGETHER_API_KEY": (
-                "",
-                "Together.AI API Key",
-            ),
-        },
-    )
diff --git a/llama_stack/templates/watsonx/__init__.py b/llama_stack/templates/watsonx/__init__.py
index 078d86144..756f351d8 100644
--- a/llama_stack/templates/watsonx/__init__.py
+++ b/llama_stack/templates/watsonx/__init__.py
@@ -3,5 +3,3 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-from .watsonx import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/watsonx/doc_template.md b/llama_stack/templates/watsonx/doc_template.md
deleted file mode 100644
index f28dbf0bf..000000000
--- a/llama_stack/templates/watsonx/doc_template.md
+++ /dev/null
@@ -1,74 +0,0 @@
----
-orphan: true
----
-# watsonx Distribution
-
-```{toctree}
-:maxdepth: 2
-:hidden:
-
-self
-```
-
-The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
-
-{{ providers_table }}
-
-{% if run_config_env_vars  %}
-
-### Environment Variables
-
-The following environment variables can be configured:
-
-{% for var, (default_value, description) in run_config_env_vars.items() %}
-- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
-{% endfor %}
-{% endif %}
-
-{% if default_models %}
-### Models
-
-The following models are available by default:
-
-{% for model in default_models %}
-- `{{ model.model_id }} {{ model.doc_string }}`
-{% endfor %}
-{% endif %}
-
-
-### Prerequisite: API Keys
-
-Make sure you have access to a watsonx API Key. You can get one by referring [watsonx.ai](https://www.ibm.com/docs/en/masv-and-l/maximo-manage/continuous-delivery?topic=setup-create-watsonx-api-key).
-
-
-## Running Llama Stack with watsonx
-
-You can do this via Conda (build code), venv or Docker which has a pre-built image.
-
-### Via Docker
-
-This method allows you to get started quickly without having to build the distribution code.
-
-```bash
-LLAMA_STACK_PORT=5001
-docker run \
-  -it \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
-  llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env WATSONX_API_KEY=$WATSONX_API_KEY \
-  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
-  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
-```
-
-### Via Conda
-
-```bash
-llama stack build --template watsonx --image-type conda
-llama stack run ./run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env WATSONX_API_KEY=$WATSONX_API_KEY \
-  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID
-```
diff --git a/tests/integration/README.md b/tests/integration/README.md
index 31d58c83f..3d3aa3d77 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -11,7 +11,7 @@ pytest --help
 Here are the most important options:
 - `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
   - a URL which points to a Llama Stack distribution server
-  - a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
+  - a template (e.g., `starter`) or a path to a `run.yaml` file
   - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
 - `--env`: set environment variables, e.g. --env KEY=value. this is a utility option to set environment variables required by various providers.
 
@@ -32,28 +32,29 @@ Experimental, under development, options:
 
 ## Examples
 
-Run all text inference tests with the `together` distribution:
+Run all text inference tests with the `starter` distribution using the `together` provider:
 
 ```bash
-pytest -s -v tests/integration/inference/test_text_inference.py \
-   --stack-config=together \
+ENABLE_TOGETHER=together pytest -s -v tests/integration/inference/test_text_inference.py \
+   --stack-config=starter \
    --text-model=meta-llama/Llama-3.1-8B-Instruct
 ```
 
-Run all text inference tests with the `together` distribution and `meta-llama/Llama-3.1-8B-Instruct`:
+Run all text inference tests with the `starter` distribution using the `together` provider and `meta-llama/Llama-3.1-8B-Instruct`:
 
 ```bash
-pytest -s -v tests/integration/inference/test_text_inference.py \
-   --stack-config=together \
+ENABLE_TOGETHER=together pytest -s -v tests/integration/inference/test_text_inference.py \
+   --stack-config=starter \
    --text-model=meta-llama/Llama-3.1-8B-Instruct
 ```
 
-Running all inference tests for a number of models:
+Running all inference tests for a number of models using the `together` provider:
 
 ```bash
 TEXT_MODELS=meta-llama/Llama-3.1-8B-Instruct,meta-llama/Llama-3.1-70B-Instruct
 VISION_MODELS=meta-llama/Llama-3.2-11B-Vision-Instruct
 EMBEDDING_MODELS=all-MiniLM-L6-v2
+ENABLE_TOGETHER=together
 export TOGETHER_API_KEY=<together_api_key>
 
 pytest -s -v tests/integration/inference/ \
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index fa96688c0..daf80059c 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -65,7 +65,7 @@ def pytest_addoption(parser):
         help=textwrap.dedent(
             """
             a 'pointer' to the stack. this can be either be:
-            (a) a template name like `fireworks`, or
+            (a) a template name like `starter`, or
             (b) a path to a run.yaml file, or
             (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
             """
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 8b6b3ddbe..9a734e8a5 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -7,6 +7,7 @@
 import inspect
 import os
 import tempfile
+from urllib.parse import urlparse
 
 import pytest
 import yaml
@@ -122,12 +123,17 @@ def llama_stack_client(request, provider_data):
     if not config:
         raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
 
-    # check if this looks like a URL
-    if config.startswith("http") or "//" in config:
-        return LlamaStackClient(
-            base_url=config,
-            provider_data=provider_data,
-        )
+    # check if this looks like a URL using proper URL parsing
+    try:
+        parsed_url = urlparse(config)
+        if parsed_url.scheme and parsed_url.netloc:
+            return LlamaStackClient(
+                base_url=config,
+                provider_data=provider_data,
+            )
+    except Exception:
+        # If URL parsing fails, treat as non-URL config
+        pass
 
     if "=" in config:
         run_config = run_config_from_adhoc_config_spec(config)
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 3e43af272..05aee5096 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -45,7 +45,7 @@ def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
     # To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
     # Use this to specifically test this API functionality.
 
-    # pytest -sv --stack-config="inference=ollama" \
+    # pytest -sv --stack-config="inference=starter" \
     # tests/integration/inference/test_openai_completion.py \
     # --text-model qwen2.5-coder:1.5b \
     # -k test_openai_completion_non_streaming_suffix