diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index e7506537f..36426e862 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -222,6 +222,40 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
+  "meta-reference-quantized-gpu": [
+    "accelerate",
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "fairscale",
+    "faiss-cpu",
+    "fastapi",
+    "fbgemm-gpu",
+    "fire",
+    "httpx",
+    "lm-format-enforcer",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "torch",
+    "torchao==0.5.0",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "zmq",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "ollama": [
     "aiohttp",
     "aiosqlite",
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
index 1b88b01cc..8bb9d8fc5 100644
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@@ -1,4 +1,5 @@
 # Bedrock Distribution
+
 ```{toctree}
 :maxdepth: 2
 :hidden:
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index 7dcc642d5..b5b52c1f4 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -7,55 +7,86 @@
 self
 ```
 
-The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists of the following provider configurations.
+The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists of the following provider configurations:
 
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| inference | `inline::meta-reference-quantized` |
+| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
+| safety | `inline::llama-guard` |
+| telemetry | `inline::meta-reference` |
 
-| **API**         	| **Inference**            	| **Agents**     	| **Memory**                                       	| **Safety**     	| **Telemetry**  	|
-|-----------------	|------------------------  	|----------------	|--------------------------------------------------	|----------------	|----------------	|
-| **Provider(s)** 	| meta-reference-quantized  | meta-reference 	| meta-reference, remote::pgvector, remote::chroma 	| meta-reference 	| meta-reference 	|
 
 The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
 
-### Step 0. Prerequisite - Downloading Models
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/cli_reference/download_models.html) here to download the models.
+Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
+- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
+- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
+
+
+## Prerequisite: Downloading Models
+
+Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
 $ ls ~/.llama/checkpoints
-Llama3.2-3B-Instruct:int4-qlora-eo8
+Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
+Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
 ```
 
-### Step 1. Start the Distribution
-#### (Option 1) Start with Docker
-```
-$ cd distributions/meta-reference-quantized-gpu && docker compose up
+## Running the Distribution
+
+You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  llamastack/distribution-meta-reference-quantized-gpu \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 ```
 
-> [!NOTE]
-> This assumes you have access to GPU to start a local server with access to your GPU.
+If you are using Llama Stack Safety / Shield APIs, use:
 
-
-> [!NOTE]
-> `~/.llama` should be the path containing downloaded weights of Llama models.
-
-
-This will download and start running a pre-built docker container. Alternatively, you may use the following commands:
-
-```
-docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-quantized-gpu --yaml_config /root/my-run.yaml
+```bash
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  llamastack/distribution-meta-reference-quantized-gpu \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 ```
 
-#### (Option 2) Start with Conda
+### Via Conda
 
-1. Install the `llama` CLI. See [CLI Reference](https://llama-stack.readthedocs.io/en/latest/cli_reference/index.html)
+Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
 
-2. Build the `meta-reference-quantized-gpu` distribution
-
-```
-$ llama stack build --template meta-reference-quantized-gpu --image-type conda
+```bash
+llama stack build --template meta-reference-quantized-gpu --image-type conda
+llama stack run distributions/meta-reference-quantized-gpu/run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 ```
 
-3. Start running distribution
-```
-$ cd distributions/meta-reference-quantized-gpu
-$ llama stack run ./run.yaml
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+llama stack run distributions/meta-reference-quantized-gpu/run-with-safety.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 ```
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index 235cc1e0f..abebe5929 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -1,5 +1,4 @@
 # Remote vLLM Distribution
-
 ```{toctree}
 :maxdepth: 2
 :hidden:
diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/llama_stack/providers/inline/inference/meta_reference/config.py
index 4713e7f99..04058d55d 100644
--- a/llama_stack/providers/inline/inference/meta_reference/config.py
+++ b/llama_stack/providers/inline/inference/meta_reference/config.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Optional
+from typing import Any, Dict, Optional
 
 from llama_models.datatypes import *  # noqa: F403
 from llama_models.sku_list import resolve_model
@@ -56,6 +56,7 @@ class MetaReferenceInferenceConfig(BaseModel):
         cls,
         model: str = "Llama3.2-3B-Instruct",
         checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
+        **kwargs,
     ) -> Dict[str, Any]:
         return {
             "model": model,
@@ -66,3 +67,16 @@ class MetaReferenceInferenceConfig(BaseModel):
 
 class MetaReferenceQuantizedInferenceConfig(MetaReferenceInferenceConfig):
     quantization: QuantizationConfig
+
+    @classmethod
+    def sample_run_config(
+        cls,
+        model: str = "Llama3.2-3B-Instruct",
+        checkpoint_dir: str = "${env.CHECKPOINT_DIR:null}",
+        **kwargs,
+    ) -> Dict[str, Any]:
+        config = super().sample_run_config(model, checkpoint_dir, **kwargs)
+        config["quantization"] = {
+            "type": "fp8",
+        }
+        return config
diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py
index 84bf9af2a..90f0dac93 100644
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@@ -50,7 +50,7 @@ def process_template(template_dir: Path, progress) -> None:
             template.save_distribution(
                 yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
                 doc_output_dir=REPO_ROOT
-                / "docs/source/getting_started/distributions"
+                / "docs/source/distributions"
                 / f"{template.distro_type}_distro",
             )
         else:
diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md
index 9331382b6..2121719b7 100644
--- a/llama_stack/templates/bedrock/doc_template.md
+++ b/llama_stack/templates/bedrock/doc_template.md
@@ -1,5 +1,12 @@
 # Bedrock Distribution
 
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
 
 {{ providers_table }}
diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md
index 2f4be574d..1b072d277 100644
--- a/llama_stack/templates/fireworks/doc_template.md
+++ b/llama_stack/templates/fireworks/doc_template.md
@@ -1,5 +1,12 @@
 # Fireworks Distribution
 
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
 
 {{ providers_table }}
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index de09efdb0..66debfb1f 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -1,5 +1,12 @@
 # Meta Reference Distribution
 
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
 
 {{ providers_table }}
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
index a22490b5e..961864dac 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@@ -1,13 +1,19 @@
+version: '2'
 name: meta-reference-quantized-gpu
 distribution_spec:
-  docker_image: pytorch/pytorch:2.5.0-cuda12.4-cudnn9-runtime
-  description: Use code from `llama_stack` itself to serve all llama stack APIs
+  description: Use Meta Reference with fp8, int4 quantization for running LLM inference
+  docker_image: null
   providers:
-    inference: meta-reference-quantized
+    inference:
+    - inline::meta-reference-quantized
     memory:
     - inline::faiss
     - remote::chromadb
     - remote::pgvector
-    safety: inline::llama-guard
-    agents: inline::meta-reference
-    telemetry: inline::meta-reference
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+image_type: conda
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
index afe1e3e20..60c64c222 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
@@ -1,54 +1,87 @@
 # Meta Reference Quantized Distribution
 
-The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists of the following provider configurations.
+```{toctree}
+:maxdepth: 2
+:hidden:
 
+self
+```
 
-| **API**         	| **Inference**            	| **Agents**     	| **Memory**                                       	| **Safety**     	| **Telemetry**  	|
-|-----------------	|------------------------  	|----------------	|--------------------------------------------------	|----------------	|----------------	|
-| **Provider(s)** 	| meta-reference-quantized  | meta-reference 	| meta-reference, remote::pgvector, remote::chroma 	| meta-reference 	| meta-reference 	|
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
+
+{{ providers_table }}
 
 The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
 
-### Step 0. Prerequisite - Downloading Models
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/cli_reference/download_models.html) here to download the models.
+Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
+
+{% if run_config_env_vars %}
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+
+## Prerequisite: Downloading Models
+
+Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
 $ ls ~/.llama/checkpoints
-Llama3.2-3B-Instruct:int4-qlora-eo8
+Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
+Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
 ```
 
-### Step 1. Start the Distribution
-#### (Option 1) Start with Docker
-```
-$ cd distributions/meta-reference-quantized-gpu && docker compose up
+## Running the Distribution
+
+You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+LLAMA_STACK_PORT=5001
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 ```
 
-> [!NOTE]
-> This assumes you have access to GPU to start a local server with access to your GPU.
+If you are using Llama Stack Safety / Shield APIs, use:
 
-
-> [!NOTE]
-> `~/.llama` should be the path containing downloaded weights of Llama models.
-
-
-This will download and start running a pre-built docker container. Alternatively, you may use the following commands:
-
-```
-docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-quantized-gpu --yaml_config /root/my-run.yaml
+```bash
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 ```
 
-#### (Option 2) Start with Conda
+### Via Conda
 
-1. Install the `llama` CLI. See [CLI Reference](https://llama-stack.readthedocs.io/en/latest/cli_reference/index.html)
+Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
 
-2. Build the `meta-reference-quantized-gpu` distribution
-
-```
-$ llama stack build --template meta-reference-quantized-gpu --image-type conda
+```bash
+llama stack build --template {{ name }} --image-type conda
+llama stack run distributions/{{ name }}/run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 ```
 
-3. Start running distribution
-```
-$ cd distributions/meta-reference-quantized-gpu
-$ llama stack run ./run.yaml
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+llama stack run distributions/{{ name }}/run-with-safety.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 ```
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
index f254bc920..1ff5d31d6 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@@ -6,16 +6,16 @@
 
 from pathlib import Path
 
-from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.meta_reference import (
-    MetaReferenceInferenceConfig,
+    MetaReferenceQuantizedInferenceConfig,
 )
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
 def get_distribution_template() -> DistributionTemplate:
     providers = {
-        "inference": ["inline::meta-reference"],
+        "inference": ["inline::meta-reference-quantized"],
         "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
@@ -24,8 +24,8 @@ def get_distribution_template() -> DistributionTemplate:
 
     inference_provider = Provider(
         provider_id="meta-reference-inference",
-        provider_type="inline::meta-reference",
-        config=MetaReferenceInferenceConfig.sample_run_config(
+        provider_type="inline::meta-reference-quantized",
+        config=MetaReferenceQuantizedInferenceConfig.sample_run_config(
             model="${env.INFERENCE_MODEL}",
             checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
         ),
@@ -35,18 +35,13 @@ def get_distribution_template() -> DistributionTemplate:
         model_id="${env.INFERENCE_MODEL}",
         provider_id="meta-reference-inference",
     )
-    safety_model = ModelInput(
-        model_id="${env.SAFETY_MODEL}",
-        provider_id="meta-reference-safety",
-    )
-
     return DistributionTemplate(
-        name="meta-reference-gpu",
+        name="meta-reference-quantized-gpu",
         distro_type="self_hosted",
-        description="Use Meta Reference for running LLM inference",
+        description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=[inference_model, safety_model],
+        default_models=[inference_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
@@ -54,26 +49,6 @@ def get_distribution_template() -> DistributionTemplate:
                 },
                 default_models=[inference_model],
             ),
-            "run-with-safety.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": [
-                        inference_provider,
-                        Provider(
-                            provider_id="meta-reference-safety",
-                            provider_type="inline::meta-reference",
-                            config=MetaReferenceInferenceConfig.sample_run_config(
-                                model="${env.SAFETY_MODEL}",
-                                checkpoint_dir="${env.SAFETY_CHECKPOINT_DIR:null}",
-                            ),
-                        ),
-                    ],
-                },
-                default_models=[
-                    inference_model,
-                    safety_model,
-                ],
-                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
-            ),
         },
         run_config_env_vars={
             "LLAMASTACK_PORT": (
@@ -88,13 +63,5 @@ def get_distribution_template() -> DistributionTemplate:
                 "null",
                 "Directory containing the Meta Reference model checkpoint",
             ),
-            "SAFETY_MODEL": (
-                "meta-llama/Llama-Guard-3-1B",
-                "Name of the safety (Llama-Guard) model to use",
-            ),
-            "SAFETY_CHECKPOINT_DIR": (
-                "null",
-                "Directory containing the Llama-Guard model checkpoint",
-            ),
         },
     )
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
new file mode 100644
index 000000000..e1104b623
--- /dev/null
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@@ -0,0 +1,58 @@
+version: '2'
+image_name: meta-reference-quantized-gpu
+docker_image: null
+conda_env: meta-reference-quantized-gpu
+apis:
+- agents
+- inference
+- memory
+- safety
+- telemetry
+providers:
+  inference:
+  - provider_id: meta-reference-inference
+    provider_type: inline::meta-reference-quantized
+    config:
+      model: ${env.INFERENCE_MODEL}
+      max_seq_len: 4096
+      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
+      quantization:
+        type: fp8
+  memory:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: meta-reference-inference
+  provider_model_id: null
+shields: []
+memory_banks: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md
index 09fe8eabc..7671ca3cf 100644
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@@ -1,5 +1,12 @@
 # Ollama Distribution
 
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
 
 {{ providers_table }}
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index 63432fb70..7614e4f77 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -1,4 +1,10 @@
 # Remote vLLM Distribution
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
 
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations:
 
diff --git a/llama_stack/templates/tgi/doc_template.md b/llama_stack/templates/tgi/doc_template.md
index 42124696f..0938e656d 100644
--- a/llama_stack/templates/tgi/doc_template.md
+++ b/llama_stack/templates/tgi/doc_template.md
@@ -1,5 +1,12 @@
 # TGI Distribution
 
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
 
 {{ providers_table }}
diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md
index 3fc94dd35..dc150ff09 100644
--- a/llama_stack/templates/together/doc_template.md
+++ b/llama_stack/templates/together/doc_template.md
@@ -1,4 +1,11 @@
-# Fireworks Distribution
+# Together Distribution
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
 
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.