From a84e7669f0c1e13ec516b9a1277077a9b8c29464 Mon Sep 17 00:00:00 2001
From: Hardik Shah <hjshah@meta.com>
Date: Thu, 6 Feb 2025 14:14:39 -0800
Subject: [PATCH] feat: Add a new template for `dell` (#978)

- Added new template `dell` and its documentation
- Update docs
- [minor] uv fix i came across
- codegen for all templates

Tested with

```bash
export INFERENCE_PORT=8181
export DEH_URL=http://0.0.0.0:$INFERENCE_PORT
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
export CHROMADB_HOST=localhost
export CHROMADB_PORT=6601
export CHROMA_URL=[http://$CHROMADB_HOST:$CHROMADB_PORT](about:blank)
export CUDA_VISIBLE_DEVICES=0
export LLAMA_STACK_PORT=8321

# build the stack template
llama stack build --template=dell

# start the TGI inference server
podman run --rm -it --network host -v $HOME/.cache/huggingface:/data -e HF_TOKEN=$HF_TOKEN -p $INFERENCE_PORT:$INFERENCE_PORT --gpus $CUDA_VISIBLE_DEVICES [ghcr.io/huggingface/text-generation-inference](http://ghcr.io/huggingface/text-generation-inference) --dtype bfloat16 --usage-stats off --sharded false --cuda-memory-fraction 0.7 --model-id $INFERENCE_MODEL --port $INFERENCE_PORT --hostname 0.0.0.0

# start chroma-db for vector-io ( aka RAG )
podman run --rm -it --network host --name chromadb -v .:/chroma/chroma -e IS_PERSISTENT=TRUE chromadb/chroma:latest --port $CHROMADB_PORT --host $(hostname)

# build docker
llama stack build --template=dell --image-type=container

# run llama stack server ( via docker )
podman run -it \
--network host \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
# NOTE: mount the llama-stack / llama-model directories if testing local changes
-v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \ localhost/distribution-dell:dev \
--port $LLAMA_STACK_PORT  \
--env INFERENCE_MODEL=$INFERENCE_MODEL \
--env DEH_URL=$DEH_URL \
--env CHROMA_URL=$CHROMA_URL

# test the server
cd <PATH_TO_LLAMA_STACK_REPO>
LLAMA_STACK_BASE_URL=http://0.0.0.0:$LLAMA_STACK_PORT pytest -s -v tests/client-sdk/agents/test_agents.py

```

---------

Co-authored-by: Hardik Shah <hjshah@fb.com>
---
 distributions/dependencies.json               | 588 +++++++++---------
 .../remote_hosted_distro/nvidia.md            |   1 +
 .../self_hosted_distro/bedrock.md             |   1 +
 .../self_hosted_distro/cerebras.md            |   1 +
 .../distributions/self_hosted_distro/dell.md  | 186 ++++++
 .../self_hosted_distro/fireworks.md           |   1 +
 .../self_hosted_distro/meta-reference-gpu.md  |   3 +-
 .../meta-reference-quantized-gpu.md           |   3 +-
 .../self_hosted_distro/ollama.md              |   3 +-
 .../self_hosted_distro/remote-vllm.md         |   3 +-
 .../self_hosted_distro/sambanova.md           |   1 +
 .../distributions/self_hosted_distro/tgi.md   |   3 +-
 .../self_hosted_distro/together.md            |   1 +
 llama_stack/distribution/build_conda_env.sh   |   2 +-
 llama_stack/distribution/build_venv.sh        |   2 +-
 .../remote/inference/nvidia/nvidia.py         |   1 +
 .../providers/utils/memory/vector_store.py    |   6 +-
 llama_stack/templates/dell/__init__.py        |   7 +
 llama_stack/templates/dell/build.yaml         |  32 +
 llama_stack/templates/dell/dell.py            | 151 +++++
 llama_stack/templates/dell/doc_template.md    | 174 ++++++
 .../templates/dell/run-with-safety.yaml       | 118 ++++
 llama_stack/templates/dell/run.yaml           | 109 ++++
 llama_stack/templates/template.py             |   5 +-
 24 files changed, 1113 insertions(+), 289 deletions(-)
 create mode 100644 docs/source/distributions/self_hosted_distro/dell.md
 create mode 100644 llama_stack/templates/dell/__init__.py
 create mode 100644 llama_stack/templates/dell/build.yaml
 create mode 100644 llama_stack/templates/dell/dell.py
 create mode 100644 llama_stack/templates/dell/doc_template.md
 create mode 100644 llama_stack/templates/dell/run-with-safety.yaml
 create mode 100644 llama_stack/templates/dell/run.yaml

diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index 2b2e35a50..6babf3440 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -1,9 +1,46 @@
 {
-  "sambanova": [
+  "bedrock": [
     "aiosqlite",
+    "autoevals",
     "blobfile",
+    "boto3",
     "chardet",
     "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "cerebras": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "cerebras_cloud_sdk",
+    "chardet",
+    "chromadb-client",
+    "datasets",
     "faiss-cpu",
     "fastapi",
     "fire",
@@ -27,7 +64,76 @@
     "transformers",
     "uvicorn",
     "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "fireworks": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "hf-endpoint": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
   "hf-serverless": [
     "aiohttp",
@@ -62,211 +168,7 @@
     "transformers",
     "uvicorn",
     "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "together": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "together",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "vllm-gpu": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "vllm",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "remote-vllm": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "fireworks": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "fireworks-ai",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "tgi": [
-    "aiohttp",
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "huggingface_hub",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "bedrock": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "boto3",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
   "meta-reference-gpu": [
     "accelerate",
@@ -306,39 +208,7 @@
     "uvicorn",
     "zmq",
     "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "nvidia": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "chardet",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "mcp",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
   "meta-reference-quantized-gpu": [
     "accelerate",
@@ -380,40 +250,7 @@
     "uvicorn",
     "zmq",
     "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "cerebras": [
-    "aiosqlite",
-    "autoevals",
-    "blobfile",
-    "cerebras_cloud_sdk",
-    "chardet",
-    "chromadb-client",
-    "datasets",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "openai",
-    "opentelemetry-exporter-otlp-proto-http",
-    "opentelemetry-sdk",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "requests",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
   "ollama": [
     "aiohttp",
@@ -447,9 +284,42 @@
     "transformers",
     "uvicorn",
     "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "hf-endpoint": [
+  "remote-vllm": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "tgi": [
     "aiohttp",
     "aiosqlite",
     "autoevals",
@@ -482,6 +352,170 @@
     "transformers",
     "uvicorn",
     "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "together": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "together",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "vllm-gpu": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "vllm",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "nvidia": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "sambanova": [
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "dell": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
   ]
 }
diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md
index 61b41b1d9..f352f737e 100644
--- a/docs/source/distributions/remote_hosted_distro/nvidia.md
+++ b/docs/source/distributions/remote_hosted_distro/nvidia.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # NVIDIA Distribution
 
 The `llamastack/distribution-nvidia` distribution consists of the following provider configurations.
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
index f9a9f29cd..64c9f8c19 100644
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Bedrock Distribution
 
 ```{toctree}
diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md
index a44e6287a..a0c9eb263 100644
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 # Cerebras Distribution
 
 The `llamastack/distribution-cerebras` distribution consists of the following provider configurations.
diff --git a/docs/source/distributions/self_hosted_distro/dell.md b/docs/source/distributions/self_hosted_distro/dell.md
new file mode 100644
index 000000000..be326ffa5
--- /dev/null
+++ b/docs/source/distributions/self_hosted_distro/dell.md
@@ -0,0 +1,186 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
+---
+orphan: true
+---
+
+# Dell Distribution of Llama Stack
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-dell` distribution consists of the following provider configurations.
+
+| API | Provider(s) |
+|-----|-------------|
+| agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
+| inference | `remote::tgi` |
+| safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
+
+
+You can use this distribution if you have GPUs and want to run an independent TGI or Dell Enterprise Hub container for running inference.
+
+### Environment Variables
+
+The following environment variables can be configured:
+
+- `DEH_URL`: URL for the Dell inference server (default: `http://0.0.0.0:8181`)
+- `DEH_SAFETY_URL`: URL for the Dell safety inference server (default: `http://0.0.0.0:8282`)
+- `CHROMA_URL`: URL for the Chroma server (default: `http://localhost:6601`)
+- `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`)
+- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
+
+
+## Setting up Inference server using Dell Enterprise Hub's custom TGI container.
+
+NOTE: This is a placeholder to run inference with TGI. This will be updated to use [Dell Enterprise Hub's containers](https://dell.huggingface.co/authenticated/models) once verified.
+
+```bash
+export INFERENCE_PORT=8181
+export DEH_URL=http://0.0.0.0:$INFERENCE_PORT
+export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
+export CHROMADB_HOST=localhost
+export CHROMADB_PORT=6601
+export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT
+export CUDA_VISIBLE_DEVICES=0
+export LLAMA_STACK_PORT=8321
+
+docker run --rm -it \
+  --network host \
+  -v $HOME/.cache/huggingface:/data \
+  -e HF_TOKEN=$HF_TOKEN \
+  -p $INFERENCE_PORT:$INFERENCE_PORT \
+  --gpus $CUDA_VISIBLE_DEVICES \
+  ghcr.io/huggingface/text-generation-inference \
+  --dtype bfloat16 \
+  --usage-stats off \
+  --sharded false \
+  --cuda-memory-fraction 0.7 \
+  --model-id $INFERENCE_MODEL \
+  --port $INFERENCE_PORT --hostname 0.0.0.0
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_INFERENCE_PORT=8282
+export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export CUDA_VISIBLE_DEVICES=1
+
+docker run --rm -it \
+  --network host \
+  -v $HOME/.cache/huggingface:/data \
+  -e HF_TOKEN=$HF_TOKEN \
+  -p $SAFETY_INFERENCE_PORT:$SAFETY_INFERENCE_PORT \
+  --gpus $CUDA_VISIBLE_DEVICES \
+  ghcr.io/huggingface/text-generation-inference \
+  --dtype bfloat16 \
+  --usage-stats off \
+  --sharded false \
+  --cuda-memory-fraction 0.7 \
+  --model-id $SAFETY_MODEL \
+  --hostname 0.0.0.0 \
+  --port $SAFETY_INFERENCE_PORT
+```
+
+## Dell distribution relies on ChromaDB for vector database usage
+
+You can start a chroma-db easily using docker.
+```bash
+# This is where the indices are persisted
+mkdir -p $HOME/chromadb
+
+podman run --rm -it \
+  --network host \
+  --name chromadb \
+  -v $HOME/chromadb:/chroma/chroma \
+  -e IS_PERSISTENT=TRUE \
+  chromadb/chroma:latest \
+  --port $CHROMADB_PORT \
+  --host $CHROMADB_HOST
+```
+
+## Running Llama Stack
+
+Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+docker run -it \
+  --network host \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v $HOME/.llama:/root/.llama \
+  # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
+  -v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \
+  # localhost/distribution-dell:dev if building / testing locally
+  llamastack/distribution-dell\
+  --port $LLAMA_STACK_PORT  \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env CHROMA_URL=$CHROMA_URL
+
+```
+
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
+export SAFETY_INFERENCE_PORT=8282
+export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v $HOME/.llama:/root/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
+  llamastack/distribution-dell \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env SAFETY_MODEL=$SAFETY_MODEL \
+  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  --env CHROMA_URL=$CHROMA_URL
+```
+
+### Via Conda
+
+Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+
+```bash
+llama stack build --template dell --image-type conda
+llama stack run dell
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env CHROMA_URL=$CHROMA_URL
+```
+
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+llama stack run ./run-with-safety.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env SAFETY_MODEL=$SAFETY_MODEL \
+  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  --env CHROMA_URL=$CHROMA_URL
+```
diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md
index 453cd746d..9afeb4894 100644
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index a371011fe..d00d8177f 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
@@ -82,7 +83,7 @@ docker run \
 
 ### Via Conda
 
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
 
 ```bash
 llama stack build --template meta-reference-gpu --image-type conda
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index a32ccb65e..e46c2d112 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
@@ -82,7 +83,7 @@ docker run \
 
 ### Via Conda
 
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
 
 ```bash
 llama stack build --template meta-reference-quantized-gpu --image-type conda
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index e7c729501..54f6b8fdf 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
@@ -103,7 +104,7 @@ docker run \
 
 ### Via Conda
 
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
 
 ```bash
 export LLAMA_STACK_PORT=5001
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index b2d28be1b..ff626d40d 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
@@ -131,7 +132,7 @@ docker run \
 
 ### Via Conda
 
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
 
 ```bash
 export INFERENCE_PORT=8000
diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md
index e428e085a..86ef4ac58 100644
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md
index ba5dee77f..b970ab9fe 100644
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
@@ -122,7 +123,7 @@ docker run \
 
 ### Via Conda
 
-Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
 
 ```bash
 llama stack build --template tgi --image-type conda
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index 2d5c8fc77..45ae462d5 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -1,3 +1,4 @@
+<!-- This file was auto-generated by distro_codegen.py, please edit source -->
 ---
 orphan: true
 ---
diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh
index ff9c26e5e..31b3e1b21 100755
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
@@ -125,7 +125,7 @@ ensure_conda_env_python310() {
       fi
 
       printf "Installing from LLAMA_MODELS_DIR: $LLAMA_MODELS_DIR\n"
-      uv pip uninstall -y llama-models
+      uv pip uninstall llama-models
       uv pip install --no-cache-dir -e "$LLAMA_MODELS_DIR"
     fi
 
diff --git a/llama_stack/distribution/build_venv.sh b/llama_stack/distribution/build_venv.sh
index 3166c07f6..3cb290bb7 100755
--- a/llama_stack/distribution/build_venv.sh
+++ b/llama_stack/distribution/build_venv.sh
@@ -89,7 +89,7 @@ run() {
       fi
 
       printf "Installing from LLAMA_MODELS_DIR: $LLAMA_MODELS_DIR\n"
-      uv pip uninstall -y llama-models
+      uv pip uninstall llama-models
       uv pip install --no-cache-dir -e "$LLAMA_MODELS_DIR"
     fi
 
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 69533491e..b9b43006c 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -26,6 +26,7 @@ from llama_stack.apis.inference import (
     Message,
     ResponseFormat,
     ToolChoice,
+    ToolConfig,
 )
 from llama_stack.providers.utils.inference.model_registry import (
     build_model_alias,
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index d35f3e516..310db18b0 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -17,9 +17,6 @@ import httpx
 import numpy as np
 
 from llama_models.llama3.api.tokenizer import Tokenizer
-from numpy.typing import NDArray
-
-from pypdf import PdfReader
 
 from llama_stack.apis.common.content_types import (
     InterleavedContent,
@@ -33,6 +30,9 @@ from llama_stack.providers.datatypes import Api
 from llama_stack.providers.utils.inference.prompt_adapter import (
     interleaved_content_as_str,
 )
+from numpy.typing import NDArray
+
+from pypdf import PdfReader
 
 log = logging.getLogger(__name__)
 
diff --git a/llama_stack/templates/dell/__init__.py b/llama_stack/templates/dell/__init__.py
new file mode 100644
index 000000000..143add56e
--- /dev/null
+++ b/llama_stack/templates/dell/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .dell import get_distribution_template  # noqa: F401
diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml
new file mode 100644
index 000000000..e2edb9386
--- /dev/null
+++ b/llama_stack/templates/dell/build.yaml
@@ -0,0 +1,32 @@
+version: '2'
+distribution_spec:
+  description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
+    container
+  providers:
+    inference:
+    - remote::tgi
+    vector_io:
+    - inline::faiss
+    - remote::chromadb
+    - remote::pgvector
+    safety:
+    - inline::llama-guard
+    agents:
+    - inline::meta-reference
+    telemetry:
+    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::code-interpreter
+    - inline::rag-runtime
+image_type: conda
diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py
new file mode 100644
index 000000000..5781da7f4
--- /dev/null
+++ b/llama_stack/templates/dell/dell.py
@@ -0,0 +1,151 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from pathlib import Path
+
+from llama_stack.apis.models.models import ModelType
+from llama_stack.distribution.datatypes import (
+    ModelInput,
+    Provider,
+    ShieldInput,
+    ToolGroupInput,
+)
+from llama_stack.providers.inline.inference.sentence_transformers import (
+    SentenceTransformersInferenceConfig,
+)
+
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+
+
+def get_distribution_template() -> DistributionTemplate:
+    providers = {
+        "inference": ["remote::tgi"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "safety": ["inline::llama-guard"],
+        "agents": ["inline::meta-reference"],
+        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+        ],
+    }
+    name = "dell"
+    inference_provider = Provider(
+        provider_id="tgi0",
+        provider_type="remote::tgi",
+        config={
+            "url": "${env.DEH_URL}",
+        },
+    )
+    safety_inference_provider = Provider(
+        provider_id="tgi1",
+        provider_type="remote::tgi",
+        config={
+            "url": "${env.DEH_SAFETY_URL}",
+        },
+    )
+    embedding_provider = Provider(
+        provider_id="sentence-transformers",
+        provider_type="inline::sentence-transformers",
+        config=SentenceTransformersInferenceConfig.sample_run_config(),
+    )
+    chromadb_provider = Provider(
+        provider_id="chromadb",
+        provider_type="remote::chromadb",
+        config={
+            "url": "${env.CHROMA_URL}",
+        },
+    )
+
+    inference_model = ModelInput(
+        model_id="${env.INFERENCE_MODEL}",
+        provider_id="tgi0",
+    )
+    safety_model = ModelInput(
+        model_id="${env.SAFETY_MODEL}",
+        provider_id="tgi1",
+    )
+    embedding_model = ModelInput(
+        model_id="all-MiniLM-L6-v2",
+        provider_id="sentence-transformers",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 384,
+        },
+    )
+    default_tool_groups = [
+        ToolGroupInput(
+            toolgroup_id="builtin::websearch",
+            provider_id="brave-search",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::rag",
+            provider_id="rag-runtime",
+        ),
+        ToolGroupInput(
+            toolgroup_id="builtin::code_interpreter",
+            provider_id="code-interpreter",
+        ),
+    ]
+
+    return DistributionTemplate(
+        name=name,
+        distro_type="self_hosted",
+        description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
+        container_image=None,
+        template_path=Path(__file__).parent / "doc_template.md",
+        providers=providers,
+        default_models=[inference_model, embedding_model],
+        run_configs={
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [inference_provider, embedding_provider],
+                    "vector_io": [chromadb_provider],
+                },
+                default_models=[inference_model, embedding_model],
+                default_tool_groups=default_tool_groups,
+            ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        safety_inference_provider,
+                        embedding_provider,
+                    ],
+                    "vector_io": [chromadb_provider],
+                },
+                default_models=[inference_model, safety_model, embedding_model],
+                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
+                default_tool_groups=default_tool_groups,
+            ),
+        },
+        run_config_env_vars={
+            "DEH_URL": (
+                "http://0.0.0.0:8181",
+                "URL for the Dell inference server",
+            ),
+            "DEH_SAFETY_URL": (
+                "http://0.0.0.0:8282",
+                "URL for the Dell safety inference server",
+            ),
+            "CHROMA_URL": (
+                "http://localhost:6601",
+                "URL for the Chroma server",
+            ),
+            "INFERENCE_MODEL": (
+                "meta-llama/Llama-3.2-3B-Instruct",
+                "Inference model loaded into the TGI server",
+            ),
+            "SAFETY_MODEL": (
+                "meta-llama/Llama-Guard-3-1B",
+                "Name of the safety (Llama-Guard) model to use",
+            ),
+        },
+    )
diff --git a/llama_stack/templates/dell/doc_template.md b/llama_stack/templates/dell/doc_template.md
new file mode 100644
index 000000000..ecb7677bc
--- /dev/null
+++ b/llama_stack/templates/dell/doc_template.md
@@ -0,0 +1,174 @@
+---
+orphan: true
+---
+
+# Dell Distribution of Llama Stack
+
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+self
+```
+
+The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
+
+{{ providers_table }}
+
+You can use this distribution if you have GPUs and want to run an independent TGI or Dell Enterprise Hub container for running inference.
+
+{% if run_config_env_vars %}
+### Environment Variables
+
+The following environment variables can be configured:
+
+{% for var, (default_value, description) in run_config_env_vars.items() %}
+- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
+{% endfor %}
+{% endif %}
+
+
+## Setting up Inference server using Dell Enterprise Hub's custom TGI container.
+
+NOTE: This is a placeholder to run inference with TGI. This will be updated to use [Dell Enterprise Hub's containers](https://dell.huggingface.co/authenticated/models) once verified.
+
+```bash
+export INFERENCE_PORT=8181
+export DEH_URL=http://0.0.0.0:$INFERENCE_PORT
+export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
+export CHROMADB_HOST=localhost
+export CHROMADB_PORT=6601
+export CHROMA_URL=http://$CHROMADB_HOST:$CHROMADB_PORT
+export CUDA_VISIBLE_DEVICES=0
+export LLAMA_STACK_PORT=8321
+
+docker run --rm -it \
+  --network host \
+  -v $HOME/.cache/huggingface:/data \
+  -e HF_TOKEN=$HF_TOKEN \
+  -p $INFERENCE_PORT:$INFERENCE_PORT \
+  --gpus $CUDA_VISIBLE_DEVICES \
+  ghcr.io/huggingface/text-generation-inference \
+  --dtype bfloat16 \
+  --usage-stats off \
+  --sharded false \
+  --cuda-memory-fraction 0.7 \
+  --model-id $INFERENCE_MODEL \
+  --port $INFERENCE_PORT --hostname 0.0.0.0
+```
+
+If you are using Llama Stack Safety / Shield APIs, then you will need to also run another instance of a TGI with a corresponding safety model like `meta-llama/Llama-Guard-3-1B` using a script like:
+
+```bash
+export SAFETY_INFERENCE_PORT=8282
+export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+export CUDA_VISIBLE_DEVICES=1
+
+docker run --rm -it \
+  --network host \
+  -v $HOME/.cache/huggingface:/data \
+  -e HF_TOKEN=$HF_TOKEN \
+  -p $SAFETY_INFERENCE_PORT:$SAFETY_INFERENCE_PORT \
+  --gpus $CUDA_VISIBLE_DEVICES \
+  ghcr.io/huggingface/text-generation-inference \
+  --dtype bfloat16 \
+  --usage-stats off \
+  --sharded false \
+  --cuda-memory-fraction 0.7 \
+  --model-id $SAFETY_MODEL \
+  --hostname 0.0.0.0 \
+  --port $SAFETY_INFERENCE_PORT
+```
+
+## Dell distribution relies on ChromaDB for vector database usage
+
+You can start a chroma-db easily using docker.
+```bash
+# This is where the indices are persisted
+mkdir -p $HOME/chromadb
+
+podman run --rm -it \
+  --network host \
+  --name chromadb \
+  -v $HOME/chromadb:/chroma/chroma \
+  -e IS_PERSISTENT=TRUE \
+  chromadb/chroma:latest \
+  --port $CHROMADB_PORT \
+  --host $CHROMADB_HOST
+```
+
+## Running Llama Stack
+
+Now you are ready to run Llama Stack with TGI as the inference provider. You can do this via Conda (build code) or Docker which has a pre-built image.
+
+### Via Docker
+
+This method allows you to get started quickly without having to build the distribution code.
+
+```bash
+docker run -it \
+  --network host \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v $HOME/.llama:/root/.llama \
+  # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
+  -v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \
+  # localhost/distribution-dell:dev if building / testing locally
+  llamastack/distribution-{{ name }}\
+  --port $LLAMA_STACK_PORT  \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env CHROMA_URL=$CHROMA_URL
+
+```
+
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
+export SAFETY_INFERENCE_PORT=8282
+export DEH_SAFETY_URL=http://0.0.0.0:$SAFETY_INFERENCE_PORT
+export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v $HOME/.llama:/root/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
+  llamastack/distribution-{{ name }} \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env SAFETY_MODEL=$SAFETY_MODEL \
+  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  --env CHROMA_URL=$CHROMA_URL
+```
+
+### Via Conda
+
+Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
+
+```bash
+llama stack build --template {{ name }} --image-type conda
+llama stack run {{ name }}
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env CHROMA_URL=$CHROMA_URL
+```
+
+If you are using Llama Stack Safety / Shield APIs, use:
+
+```bash
+llama stack run ./run-with-safety.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env DEH_URL=$DEH_URL \
+  --env SAFETY_MODEL=$SAFETY_MODEL \
+  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  --env CHROMA_URL=$CHROMA_URL
+```
diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml
new file mode 100644
index 000000000..bdc82d03a
--- /dev/null
+++ b/llama_stack/templates/dell/run-with-safety.yaml
@@ -0,0 +1,118 @@
+version: '2'
+image_name: dell
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: tgi0
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_URL}
+  - provider_id: tgi1
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_SAFETY_URL}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMA_URL}
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: tgi0
+  model_type: llm
+- metadata: {}
+  model_id: ${env.SAFETY_MODEL}
+  provider_id: tgi1
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields:
+- shield_id: ${env.SAFETY_MODEL}
+vector_dbs: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: brave-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml
new file mode 100644
index 000000000..2ba62a782
--- /dev/null
+++ b/llama_stack/templates/dell/run.yaml
@@ -0,0 +1,109 @@
+version: '2'
+image_name: dell
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: tgi0
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_URL}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+    config: {}
+  vector_io:
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMA_URL}
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/dell/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: tgi0
+  model_type: llm
+- metadata:
+    embedding_dimension: 384
+  model_id: all-MiniLM-L6-v2
+  provider_id: sentence-transformers
+  model_type: embedding
+shields: []
+vector_dbs: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
+tool_groups:
+- toolgroup_id: builtin::websearch
+  provider_id: brave-search
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+- toolgroup_id: builtin::code_interpreter
+  provider_id: code-interpreter
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 2da55c5c9..09efd2038 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -9,7 +9,6 @@ from typing import Dict, List, Literal, Optional, Tuple
 
 import jinja2
 import yaml
-from pydantic import BaseModel, Field
 
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
@@ -25,6 +24,7 @@ from llama_stack.distribution.datatypes import (
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
+from pydantic import BaseModel, Field
 
 
 class RunConfigSettings(BaseModel):
@@ -131,7 +131,8 @@ class DistributionTemplate(BaseModel):
             providers_str = ", ".join(f"`{p}`" for p in providers)
             providers_table += f"| {api} | {providers_str} |\n"
 
-        template = self.template_path.read_text()
+        template = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
+        template += self.template_path.read_text()
         # Render template with rich-generated table
         env = jinja2.Environment(
             trim_blocks=True,