From 7301403ce38ae3c3309199602f7cd3472a9238b8 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Thu, 5 Dec 2024 16:29:32 -0800
Subject: [PATCH] Add eval/scoring/datasetio API providers to distribution
 templates & UI developer guide (#564)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

- add /eval, /scoring, /datasetio API providers to distribution
templates
- regenerate build.yaml / run.yaml files
- fix `template.py` to take in list of providers instead of only first
one
- override memory provider as faiss default for all distro (as only 1
memory provider is needed to start basic flow, chromadb/pgvector need
additional setup step).
```
python llama_stack/scripts/distro_codegen.py
```

- updated README to start UI via conda builds.

## Test Plan

```
python llama_stack/scripts/distro_codegen.py
```

- Use newly generated `run.yaml` to start server
```
llama stack run ./llama_stack/templates/together/run.yaml
```
<img width="1191" alt="image"
src="https://github.com/user-attachments/assets/62f7d179-0cd0-427c-b6e8-e087d4648f09">


#### Registration
```
❯ llama-stack-client datasets register \
--dataset-id "mmlu" \
--provider-id "huggingface" \
--url "https://huggingface.co/datasets/llamastack/evals" \
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
❯ llama-stack-client datasets list
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ metadata                                ┃ type    ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
│ mmlu       │ huggingface │ {'path': 'llamastack/evals', 'name':    │ dataset │
│            │             │ 'evals__mmlu__details', 'split':        │         │
│            │             │ 'train'}                                │         │
└────────────┴─────────────┴─────────────────────────────────────────┴─────────┘
```

```
❯ llama-stack-client datasets register \
--dataset-id "simpleqa" \
--provider-id "huggingface" \
--url "https://huggingface.co/datasets/llamastack/evals" \
--metadata '{"path": "llamastack/evals", "name": "evals__simpleqa", "split": "train"}' \
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
❯ llama-stack-client datasets list
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ metadata                                                      ┃ type    ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
│ mmlu       │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__mmlu__details',  │ dataset │
│            │             │ 'split': 'train'}                                             │         │
│ simpleqa   │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__simpleqa',       │ dataset │
│            │             │ 'split': 'train'}                                             │         │
└────────────┴─────────────┴───────────────────────────────────────────────────────────────┴─────────┘
```

```
❯ llama-stack-client eval_tasks register \
> --eval-task-id meta-reference-mmlu \
> --provider-id meta-reference \
> --dataset-id mmlu \
> --scoring-functions basic::regex_parser_multiple_choice_answer
❯ llama-stack-client eval_tasks register \
--eval-task-id meta-reference-simpleqa \
--provider-id meta-reference \
--dataset-id simpleqa \
--scoring-functions llm-as-judge::405b-simpleqa
❯ llama-stack-client eval_tasks list
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃ dataset_id ┃ identifier       ┃ metadata ┃ provider_id    ┃ provider_resour… ┃ scoring_functio… ┃ type      ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ mmlu       │ meta-reference-… │ {}       │ meta-reference │ meta-reference-… │ ['basic::regex_… │ eval_task │
│ simpleqa   │ meta-reference-… │ {}       │ meta-reference │ meta-reference-… │ ['llm-as-judge:… │ eval_task │
└────────────┴──────────────────┴──────────┴────────────────┴──────────────────┴──────────────────┴───────────┘
```

#### Test with UI
```
streamlit run app.py
```

## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
---
 distributions/dependencies.json               | 290 ++++++++++--------
 .../self_hosted_distro/bedrock.md             |   6 +-
 .../self_hosted_distro/fireworks.md           |   3 +
 .../self_hosted_distro/meta-reference-gpu.md  |   3 +
 .../meta-reference-quantized-gpu.md           |   3 +
 .../self_hosted_distro/ollama.md              |   5 +-
 .../distributions/self_hosted_distro/tgi.md   |   3 +
 .../self_hosted_distro/together.md            |   3 +
 llama_stack/distribution/ui/README.md         |  41 ++-
 llama_stack/templates/bedrock/bedrock.py      |  20 +-
 llama_stack/templates/bedrock/build.yaml      |   9 +
 llama_stack/templates/bedrock/run.yaml        |  24 ++
 llama_stack/templates/fireworks/build.yaml    |   9 +
 llama_stack/templates/fireworks/fireworks.py  |  14 +-
 llama_stack/templates/fireworks/run.yaml      |  24 ++
 llama_stack/templates/hf-endpoint/build.yaml  |   9 +
 .../templates/hf-endpoint/hf_endpoint.py      |  17 +-
 .../hf-endpoint/run-with-safety.yaml          |  24 ++
 llama_stack/templates/hf-endpoint/run.yaml    |  24 ++
 .../templates/hf-serverless/build.yaml        |   9 +
 .../templates/hf-serverless/hf_serverless.py  |  16 +-
 .../hf-serverless/run-with-safety.yaml        |  24 ++
 llama_stack/templates/hf-serverless/run.yaml  |  24 ++
 .../templates/meta-reference-gpu/build.yaml   |   9 +
 .../meta-reference-gpu/meta_reference.py      |  15 +-
 .../meta-reference-gpu/run-with-safety.yaml   |  24 ++
 .../templates/meta-reference-gpu/run.yaml     |  24 ++
 .../meta-reference-quantized-gpu/build.yaml   |   9 +
 .../meta_reference.py                         |  14 +-
 .../meta-reference-quantized-gpu/run.yaml     |  24 ++
 llama_stack/templates/ollama/build.yaml       |   9 +
 llama_stack/templates/ollama/doc_template.md  |   6 +-
 llama_stack/templates/ollama/ollama.py        |  17 +-
 .../templates/ollama/run-with-safety.yaml     |  24 ++
 llama_stack/templates/ollama/run.yaml         |  24 ++
 llama_stack/templates/remote-vllm/vllm.py     |  12 +-
 llama_stack/templates/template.py             |  55 ++--
 llama_stack/templates/tgi/build.yaml          |   9 +
 .../templates/tgi/run-with-safety.yaml        |  24 ++
 llama_stack/templates/tgi/run.yaml            |  24 ++
 llama_stack/templates/tgi/tgi.py              |  15 +-
 llama_stack/templates/together/build.yaml     |   9 +
 llama_stack/templates/together/run.yaml       |  24 ++
 llama_stack/templates/together/together.py    |  14 +-
 llama_stack/templates/vllm-gpu/build.yaml     |   9 +
 llama_stack/templates/vllm-gpu/run.yaml       |  24 ++
 llama_stack/templates/vllm-gpu/vllm.py        |  14 +-
 47 files changed, 841 insertions(+), 195 deletions(-)
diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index 80468cc73..4e66a85da 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -1,10 +1,12 @@
 {
-  "tgi": [
+  "hf-serverless": [
     "aiohttp",
     "aiosqlite",
+    "autoevals",
     "blobfile",
     "chardet",
     "chromadb-client",
+    "datasets",
     "faiss-cpu",
     "fastapi",
     "fire",
@@ -13,6 +15,7 @@
     "matplotlib",
     "nltk",
     "numpy",
+    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -27,6 +30,66 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
+  "together": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "together",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "vllm-gpu": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "vllm",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "remote-vllm": [
     "aiosqlite",
     "blobfile",
@@ -54,18 +117,22 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "vllm-gpu": [
+  "fireworks": [
     "aiosqlite",
+    "autoevals",
     "blobfile",
     "chardet",
     "chromadb-client",
+    "datasets",
     "faiss-cpu",
     "fastapi",
     "fire",
+    "fireworks-ai",
     "httpx",
     "matplotlib",
     "nltk",
     "numpy",
+    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -77,82 +144,17 @@
     "tqdm",
     "transformers",
     "uvicorn",
-    "vllm",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "meta-reference-quantized-gpu": [
-    "accelerate",
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "fairscale",
-    "faiss-cpu",
-    "fastapi",
-    "fbgemm-gpu",
-    "fire",
-    "httpx",
-    "lm-format-enforcer",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "torch",
-    "torchao==0.5.0",
-    "torchvision",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "zmq",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "meta-reference-gpu": [
-    "accelerate",
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "fairscale",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "lm-format-enforcer",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "torch",
-    "torchvision",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "zmq",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "hf-serverless": [
+  "tgi": [
     "aiohttp",
     "aiosqlite",
+    "autoevals",
     "blobfile",
     "chardet",
     "chromadb-client",
+    "datasets",
     "faiss-cpu",
     "fastapi",
     "fire",
@@ -161,61 +163,7 @@
     "matplotlib",
     "nltk",
     "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "together": [
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "together",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "ollama": [
-    "aiohttp",
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "ollama",
+    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -232,10 +180,12 @@
   ],
   "bedrock": [
     "aiosqlite",
+    "autoevals",
     "blobfile",
     "boto3",
     "chardet",
     "chromadb-client",
+    "datasets",
     "faiss-cpu",
     "fastapi",
     "fire",
@@ -243,6 +193,7 @@
     "matplotlib",
     "nltk",
     "numpy",
+    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -257,20 +208,24 @@
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "hf-endpoint": [
-    "aiohttp",
+  "meta-reference-gpu": [
+    "accelerate",
     "aiosqlite",
+    "autoevals",
     "blobfile",
     "chardet",
     "chromadb-client",
+    "datasets",
+    "fairscale",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
-    "huggingface_hub",
+    "lm-format-enforcer",
     "matplotlib",
     "nltk",
     "numpy",
+    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -279,25 +234,34 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "torch",
+    "torchvision",
     "tqdm",
     "transformers",
     "uvicorn",
+    "zmq",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
-  "fireworks": [
+  "meta-reference-quantized-gpu": [
+    "accelerate",
     "aiosqlite",
+    "autoevals",
     "blobfile",
     "chardet",
     "chromadb-client",
+    "datasets",
+    "fairscale",
     "faiss-cpu",
     "fastapi",
+    "fbgemm-gpu",
     "fire",
-    "fireworks-ai",
     "httpx",
+    "lm-format-enforcer",
     "matplotlib",
     "nltk",
     "numpy",
+    "openai",
     "pandas",
     "pillow",
     "psycopg2-binary",
@@ -306,9 +270,13 @@
     "scikit-learn",
     "scipy",
     "sentencepiece",
+    "torch",
+    "torchao==0.5.0",
+    "torchvision",
     "tqdm",
     "transformers",
     "uvicorn",
+    "zmq",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
   ],
@@ -337,5 +305,67 @@
     "uvicorn",
     "sentence-transformers --no-deps",
     "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "ollama": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "ollama",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "hf-endpoint": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
   ]
 }
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
index e0a5d80d0..ae03c89da 100644
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@@ -1,6 +1,3 @@
----
-orphan: true
----
 # Bedrock Distribution
 
 ```{toctree}
@@ -15,9 +12,12 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::bedrock` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `remote::bedrock` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md
index e54302c2e..06a12cb1d 100644
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@@ -15,9 +15,12 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::fireworks` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index f9717894f..73d6befd4 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `inline::meta-reference` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index 3ca161d07..fab9c6cd8 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `inline::meta-reference-quantized` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index 9f81d9329..c915a7ac3 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -15,9 +15,12 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::ollama` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 
 
@@ -119,7 +122,7 @@ llama stack run ./run-with-safety.yaml \
 ### (Optional) Update Model Serving Configuration
 
 ```{note}
-Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) variable for supported Ollama models.
+Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
 ```
 
 To serve a new model with `ollama`
diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md
index 59485226e..84b91da38 100644
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@@ -16,9 +16,12 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::tgi` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 
 
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index 5cfc9e805..c458fdb5f 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -15,9 +15,12 @@ The `llamastack/distribution-together` distribution consists of the following pr
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::together` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |
 
 
diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md
index 2cc352c52..c0a2597af 100644
--- a/llama_stack/distribution/ui/README.md
+++ b/llama_stack/distribution/ui/README.md
@@ -1,16 +1,41 @@
-# LLama Stack UI
+# (Experimental) LLama Stack UI
 
-[!NOTE] This is a work in progress.
+## Docker Setup
 
-## Prerequisite
-- Start up Llama Stack Server
-```
-llama stack run
-```
+:warning: This is a work in progress.
 
-## Running Streamlit App
+## Developer Setup
+
+1. Start up Llama Stack API server. More details [here](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).
 
 ```
+llama stack build --template together --image-type conda
+
+llama stack run together
+```
+
+2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
+
+```bash
+$ llama-stack-client datasets register \
+--dataset-id "mmlu" \
+--provider-id "huggingface" \
+--url "https://huggingface.co/datasets/llamastack/evals" \
+--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
+--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
+```
+
+```bash
+$ llama-stack-client eval_tasks register \
+--eval-task-id meta-reference-mmlu \
+--provider-id meta-reference \
+--dataset-id mmlu \
+--scoring-functions basic::regex_parser_multiple_choice_answer
+```
+
+3. Start Streamlit UI
+
+```bash
 cd llama_stack/distribution/ui
 pip install -r requirements.txt
 streamlit run app.py
diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
index cf3c342fe..c52b56612 100644
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@@ -6,6 +6,9 @@
 
 from pathlib import Path
 
+from llama_stack.distribution.datatypes import Provider
+
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -16,10 +19,19 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["remote::bedrock"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
+    name = "bedrock"
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     return DistributionTemplate(
-        name="bedrock",
+        name=name,
         distro_type="self_hosted",
         description="Use AWS Bedrock for running LLM inference and safety",
         docker_image=None,
@@ -27,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
         providers=providers,
         default_models=[],
         run_configs={
-            "run.yaml": RunConfigSettings(),
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "memory": [memory_provider],
+                },
+            ),
         },
         run_config_env_vars={
             "LLAMASTACK_PORT": (
diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml
index c73db3eae..cd36c320e 100644
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index 1f632a1f2..77d4f2248 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: bedrock
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -37,6 +40,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml
index c16e3f5d6..30ea347ae 100644
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index 5f744cae0..64387e4b7 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from llama_models.sku_list import all_registered_models
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
 
@@ -22,13 +23,23 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
 
+    name = "fireworks"
+
     inference_provider = Provider(
         provider_id="fireworks",
         provider_type="remote::fireworks",
         config=FireworksImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     core_model_to_hf_repo = {
         m.descriptor(): m.huggingface_repo for m in all_registered_models()
@@ -42,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
     ]
 
     return DistributionTemplate(
-        name="fireworks",
+        name=name,
         distro_type="self_hosted",
         description="Use Fireworks.AI for running LLM inference",
         docker_image=None,
@@ -53,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=default_models,
                 default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index 6add39c3a..9296be28f 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: fireworks
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -39,6 +42,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml
index 798cb3961..523cf5d83 100644
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
index af00114ba..297fdae51 100644
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "hf-endpoint"
     inference_provider = Provider(
         provider_id="hf-endpoint",
         provider_type="remote::hf::endpoint",
         config=InferenceEndpointImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -34,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="hf-endpoint",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
         docker_image=None,
@@ -45,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
@@ -59,7 +69,8 @@ def get_distribution_template() -> DistributionTemplate:
                                 endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}",
                             ),
                         ),
-                    ]
+                    ],
+                    "memory": [memory_provider],
                 },
                 default_models=[
                     inference_model,
diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
index d518f29b8..bd625ffc5 100644
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-endpoint
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -44,6 +47,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml
index ff4e90606..bf0697bba 100644
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-endpoint
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -39,6 +42,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml
index 3c03a98c1..af7eb60fe 100644
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
index 5434de986..835495bb9 100644
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -16,13 +17,22 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
 
+    name = "hf-serverless"
     inference_provider = Provider(
         provider_id="hf-serverless",
         provider_type="remote::hf::serverless",
         config=InferenceAPIImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -34,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="hf-serverless",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
         docker_image=None,
@@ -45,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
@@ -59,7 +70,8 @@ def get_distribution_template() -> DistributionTemplate:
                                 repo="${env.SAFETY_MODEL}",
                             ),
                         ),
-                    ]
+                    ],
+                    "memory": [memory_provider],
                 },
                 default_models=[
                     inference_model,
diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml
index e7591bbf0..f5ead14d4 100644
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-serverless
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -44,6 +47,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml
index d7ec02f6a..13e2d7789 100644
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-serverless
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -39,6 +42,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml
index ef075d098..300b75b14 100644
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py
index f254bc920..0aff9f39c 100644
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.inference.meta_reference import (
     MetaReferenceInferenceConfig,
 )
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "meta-reference-gpu"
     inference_provider = Provider(
         provider_id="meta-reference-inference",
         provider_type="inline::meta-reference",
@@ -30,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
             checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -41,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="meta-reference-gpu",
+        name=name,
         distro_type="self_hosted",
         description="Use Meta Reference for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
@@ -51,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
@@ -67,6 +77,7 @@ def get_distribution_template() -> DistributionTemplate:
                             ),
                         ),
                     ],
+                    "memory": [memory_provider],
                 },
                 default_models=[
                     inference_model,
diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
index f82e0c938..d0fa05e96 100644
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: meta-reference-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -46,6 +49,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml
index b125169a3..3675f4a58 100644
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: meta-reference-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -40,6 +43,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
index 961864dac..9d866de18 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
index 1ff5d31d6..1d611ae5f 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.meta_reference import (
     MetaReferenceQuantizedInferenceConfig,
 )
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "meta-reference-quantized-gpu"
     inference_provider = Provider(
         provider_id="meta-reference-inference",
         provider_type="inline::meta-reference-quantized",
@@ -30,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
             checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
         provider_id="meta-reference-inference",
     )
     return DistributionTemplate(
-        name="meta-reference-quantized-gpu",
+        name=name,
         distro_type="self_hosted",
         description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
@@ -46,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
index e1104b623..081af0f59 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: meta-reference-quantized-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -42,6 +45,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index 106449309..a021e4993 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md
index cfefce33d..a75583592 100644
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@@ -114,9 +114,9 @@ llama stack run ./run-with-safety.yaml \
 
 ### (Optional) Update Model Serving Configuration
 
-> [!NOTE]
-> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
-
+```{note}
+Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
+```
 
 To serve a new model with `ollama`
 ```bash
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index b30c75bb5..c24dfa6e9 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -18,13 +19,21 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "ollama"
     inference_provider = Provider(
         provider_id="ollama",
         provider_type="remote::ollama",
         config=OllamaImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -36,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="ollama",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) Ollama server for running LLM inference",
         docker_image=None,
@@ -47,6 +56,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
@@ -54,7 +64,8 @@ def get_distribution_template() -> DistributionTemplate:
                 provider_overrides={
                     "inference": [
                         inference_provider,
-                    ]
+                    ],
+                    "memory": [memory_provider],
                 },
                 default_models=[
                     inference_model,
diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml
index 6c86677b3..dc282f996 100644
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: ollama
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -38,6 +41,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index b2d6f2c18..ab8e12839 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: ollama
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -38,6 +41,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index c3858f7e5..f5ccfcf16 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
     }
-
+    name = "remote-vllm"
     inference_provider = Provider(
         provider_id="vllm-inference",
         provider_type="remote::vllm",
@@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
             url="${env.VLLM_URL}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="remote-vllm",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) vLLM server for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
@@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
@@ -63,6 +70,7 @@ def get_distribution_template() -> DistributionTemplate:
                             ),
                         ),
                     ],
+                    "memory": [memory_provider],
                 },
                 default_models=[
                     inference_model,
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index bf74b95d1..e82be6394 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -44,36 +44,37 @@ class RunConfigSettings(BaseModel):
                 provider_configs[api_str] = api_providers
                 continue
 
-            provider_type = provider_types[0]
-            provider_id = provider_type.split("::")[-1]
+            provider_configs[api_str] = []
+            for provider_type in provider_types:
+                provider_id = provider_type.split("::")[-1]
 
-            api = Api(api_str)
-            if provider_type not in provider_registry[api]:
-                raise ValueError(
-                    f"Unknown provider type: {provider_type} for API: {api_str}"
+                api = Api(api_str)
+                if provider_type not in provider_registry[api]:
+                    raise ValueError(
+                        f"Unknown provider type: {provider_type} for API: {api_str}"
+                    )
+
+                config_class = provider_registry[api][provider_type].config_class
+                assert (
+                    config_class is not None
+                ), f"No config class for provider type: {provider_type} for API: {api_str}"
+
+                config_class = instantiate_class_type(config_class)
+                if hasattr(config_class, "sample_run_config"):
+                    config = config_class.sample_run_config(
+                        __distro_dir__=f"distributions/{name}"
+                    )
+                else:
+                    config = {}
+
+                provider_configs[api_str].append(
+                    Provider(
+                        provider_id=provider_id,
+                        provider_type=provider_type,
+                        config=config,
+                    )
                 )
 
-            config_class = provider_registry[api][provider_type].config_class
-            assert (
-                config_class is not None
-            ), f"No config class for provider type: {provider_type} for API: {api_str}"
-
-            config_class = instantiate_class_type(config_class)
-            if hasattr(config_class, "sample_run_config"):
-                config = config_class.sample_run_config(
-                    __distro_dir__=f"distributions/{name}"
-                )
-            else:
-                config = {}
-
-            provider_configs[api_str] = [
-                Provider(
-                    provider_id=provider_id,
-                    provider_type=provider_type,
-                    config=config,
-                )
-            ]
-
         # Get unique set of APIs from providers
         apis = list(sorted(providers.keys()))
 
diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml
index 0f7602e2f..d90b505df 100644
--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml
index ebf082cd6..2ee82ddc3 100644
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: tgi
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -42,6 +45,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml
index 352afabb5..c45e114ee 100644
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: tgi
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -38,6 +41,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
index caa341df3..83818a598 100644
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import TGIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
@@ -18,8 +19,11 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "tgi"
     inference_provider = Provider(
         provider_id="tgi-inference",
         provider_type="remote::tgi",
@@ -27,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
             url="${env.TGI_URL}",
         ),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -38,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="tgi",
+        name=name,
         distro_type="self_hosted",
         description="Use (an external) TGI server for running LLM inference",
         docker_image=None,
@@ -49,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),
@@ -64,6 +74,7 @@ def get_distribution_template() -> DistributionTemplate:
                             ),
                         ),
                     ],
+                    "memory": [memory_provider],
                 },
                 default_models=[
                     inference_model,
diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml
index a4402ba93..6930b7692 100644
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index 855ba0626..a9f96a099 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: together
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -39,6 +42,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index 16265b04f..6656cfe44 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from llama_models.sku_list import all_registered_models
 
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
 
@@ -22,13 +23,21 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "together"
     inference_provider = Provider(
         provider_id="together",
         provider_type="remote::together",
         config=TogetherImplConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     core_model_to_hf_repo = {
         m.descriptor(): m.huggingface_repo for m in all_registered_models()
@@ -42,7 +51,7 @@ def get_distribution_template() -> DistributionTemplate:
     ]
 
     return DistributionTemplate(
-        name="together",
+        name=name,
         distro_type="self_hosted",
         description="Use Together.AI for running LLM inference",
         docker_image=None,
@@ -53,6 +62,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=default_models,
                 default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml
index 6792a855f..4289296ec 100644
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@@ -16,4 +16,13 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml
index a140ad403..ea188777f 100644
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@@ -4,9 +4,12 @@ docker_image: null
 conda_env: vllm-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
   inference:
@@ -42,6 +45,27 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
   namespace: null
   type: sqlite
diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py
index 78fcf4f57..10b448b5c 100644
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@@ -6,6 +6,7 @@
 
 from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.vllm import VLLMConfig
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 
 
@@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
     }
-
+    name = "vllm-gpu"
     inference_provider = Provider(
         provider_id="vllm",
         provider_type="inline::vllm",
         config=VLLMConfig.sample_run_config(),
     )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )
 
     inference_model = ModelInput(
         model_id="${env.INFERENCE_MODEL}",
@@ -30,7 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
     )
 
     return DistributionTemplate(
-        name="vllm-gpu",
+        name=name,
         distro_type="self_hosted",
         description="Use a built-in vLLM engine for running LLM inference",
         docker_image=None,
@@ -41,6 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
             "run.yaml": RunConfigSettings(
                 provider_overrides={
                     "inference": [inference_provider],
+                    "memory": [memory_provider],
                 },
                 default_models=[inference_model],
             ),