Add eval/scoring/datasetio API providers to distribution templates & UI developer guide (#564)

# What does this PR do? - add /eval, /scoring, /datasetio API providers to distribution templates - regenerate build.yaml / run.yaml files - fix `template.py` to take in list of providers instead of only first one - override memory provider as faiss default for all distro (as only 1 memory provider is needed to start basic flow, chromadb/pgvector need additional setup step). ``` python llama_stack/scripts/distro_codegen.py ``` - updated README to start UI via conda builds. ## Test Plan ``` python llama_stack/scripts/distro_codegen.py ``` - Use newly generated `run.yaml` to start server ``` llama stack run ./llama_stack/templates/together/run.yaml ``` <img width="1191" alt="image" src="https://github.com/user-attachments/assets/62f7d179-0cd0-427c-b6e8-e087d4648f09"> #### Registration ``` ❯ llama-stack-client datasets register \ --dataset-id "mmlu" \ --provider-id "huggingface" \ --url "https://huggingface.co/datasets/llamastack/evals" \ --metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \ --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}' ❯ llama-stack-client datasets list ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ ┃ identifier ┃ provider_id ┃ metadata ┃ type ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ │ mmlu │ huggingface │ {'path': 'llamastack/evals', 'name': │ dataset │ │ │ │ 'evals__mmlu__details', 'split': │ │ │ │ │ 'train'} │ │ └────────────┴─────────────┴─────────────────────────────────────────┴─────────┘ ``` ``` ❯ llama-stack-client datasets register \ --dataset-id "simpleqa" \ --provider-id "huggingface" \ --url "https://huggingface.co/datasets/llamastack/evals" \ --metadata '{"path": "llamastack/evals", "name": "evals__simpleqa", "split": "train"}' \ --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}' ❯ llama-stack-client datasets list ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ ┃ identifier ┃ provider_id ┃ metadata ┃ type ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ │ mmlu │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__mmlu__details', │ dataset │ │ │ │ 'split': 'train'} │ │ │ simpleqa │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__simpleqa', │ dataset │ │ │ │ 'split': 'train'} │ │ └────────────┴─────────────┴───────────────────────────────────────────────────────────────┴─────────┘ ``` ``` ❯ llama-stack-client eval_tasks register \ > --eval-task-id meta-reference-mmlu \ > --provider-id meta-reference \ > --dataset-id mmlu \ > --scoring-functions basic::regex_parser_multiple_choice_answer ❯ llama-stack-client eval_tasks register \ --eval-task-id meta-reference-simpleqa \ --provider-id meta-reference \ --dataset-id simpleqa \ --scoring-functions llm-as-judge::405b-simpleqa ❯ llama-stack-client eval_tasks list ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ ┃ dataset_id ┃ identifier ┃ metadata ┃ provider_id ┃ provider_resour… ┃ scoring_functio… ┃ type ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ │ mmlu │ meta-reference-… │ {} │ meta-reference │ meta-reference-… │ ['basic::regex_… │ eval_task │ │ simpleqa │ meta-reference-… │ {} │ meta-reference │ meta-reference-… │ ['llm-as-judge:… │ eval_task │ └────────────┴──────────────────┴──────────┴────────────────┴──────────────────┴──────────────────┴───────────┘ ``` #### Test with UI ``` streamlit run app.py ``` ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
2024-12-05 16:29:32 -08:00 · 2024-12-05 16:29:32 -08:00 · 7301403ce3
commit 7301403ce3
parent a4daf4d3ec
47 changed files with 841 additions and 195 deletions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -1,10 +1,12 @@
 {
-  "tgi": [
+  "hf-serverless": [
    "aiohttp",
    "aiosqlite",
+    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
+    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
@ -13,6 +15,7 @@
    "matplotlib",
    "nltk",
    "numpy",
+    "openai",
    "pandas",
    "pillow",
    "psycopg2-binary",
@ -27,6 +30,66 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
+  "together": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "together",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "vllm-gpu": [
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "vllm",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
  "remote-vllm": [
    "aiosqlite",
    "blobfile",
@ -54,18 +117,22 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
-  "vllm-gpu": [
+  "fireworks": [
    "aiosqlite",
+    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
+    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
+    "fireworks-ai",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
+    "openai",
    "pandas",
    "pillow",
    "psycopg2-binary",
@ -77,82 +144,17 @@
    "tqdm",
    "transformers",
    "uvicorn",
-    "vllm",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
-  "meta-reference-quantized-gpu": [
-    "accelerate",
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "fairscale",
-    "faiss-cpu",
-    "fastapi",
-    "fbgemm-gpu",
-    "fire",
-    "httpx",
-    "lm-format-enforcer",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "torch",
-    "torchao==0.5.0",
-    "torchvision",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "zmq",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "meta-reference-gpu": [
-    "accelerate",
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "fairscale",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "lm-format-enforcer",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "torch",
-    "torchvision",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "zmq",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "hf-serverless": [
+  "tgi": [
    "aiohttp",
    "aiosqlite",
+    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
+    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
@ -161,61 +163,7 @@
    "matplotlib",
    "nltk",
    "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "together": [
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "pandas",
-    "pillow",
-    "psycopg2-binary",
-    "pypdf",
-    "redis",
-    "scikit-learn",
-    "scipy",
-    "sentencepiece",
-    "together",
-    "tqdm",
-    "transformers",
-    "uvicorn",
-    "sentence-transformers --no-deps",
-    "torch --index-url https://download.pytorch.org/whl/cpu"
-  ],
-  "ollama": [
-    "aiohttp",
-    "aiosqlite",
-    "blobfile",
-    "chardet",
-    "chromadb-client",
-    "faiss-cpu",
-    "fastapi",
-    "fire",
-    "httpx",
-    "matplotlib",
-    "nltk",
-    "numpy",
-    "ollama",
+    "openai",
    "pandas",
    "pillow",
    "psycopg2-binary",
@ -232,10 +180,12 @@
  ],
  "bedrock": [
    "aiosqlite",
+    "autoevals",
    "blobfile",
    "boto3",
    "chardet",
    "chromadb-client",
+    "datasets",
    "faiss-cpu",
    "fastapi",
    "fire",
@ -243,6 +193,7 @@
    "matplotlib",
    "nltk",
    "numpy",
+    "openai",
    "pandas",
    "pillow",
    "psycopg2-binary",
@ -257,20 +208,24 @@
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
-  "hf-endpoint": [
-    "aiohttp",
+  "meta-reference-gpu": [
+    "accelerate",
    "aiosqlite",
+    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
+    "datasets",
+    "fairscale",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
-    "huggingface_hub",
+    "lm-format-enforcer",
    "matplotlib",
    "nltk",
    "numpy",
+    "openai",
    "pandas",
    "pillow",
    "psycopg2-binary",
@ -279,25 +234,34 @@
    "scikit-learn",
    "scipy",
    "sentencepiece",
+    "torch",
+    "torchvision",
    "tqdm",
    "transformers",
    "uvicorn",
+    "zmq",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
-  "fireworks": [
+  "meta-reference-quantized-gpu": [
+    "accelerate",
    "aiosqlite",
+    "autoevals",
    "blobfile",
    "chardet",
    "chromadb-client",
+    "datasets",
+    "fairscale",
    "faiss-cpu",
    "fastapi",
+    "fbgemm-gpu",
    "fire",
-    "fireworks-ai",
    "httpx",
+    "lm-format-enforcer",
    "matplotlib",
    "nltk",
    "numpy",
+    "openai",
    "pandas",
    "pillow",
    "psycopg2-binary",
@ -306,9 +270,13 @@
    "scikit-learn",
    "scipy",
    "sentencepiece",
+    "torch",
+    "torchao==0.5.0",
+    "torchvision",
    "tqdm",
    "transformers",
    "uvicorn",
+    "zmq",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
@ -337,5 +305,67 @@
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "ollama": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "ollama",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
+  "hf-endpoint": [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
  ]
 }
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@ -1,6 +1,3 @@
---
-orphan: true
---
 # Bedrock Distribution

 ```{toctree}
@ -15,9 +12,12 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::bedrock` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `remote::bedrock` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |


--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@ -15,9 +15,12 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::fireworks` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |


--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `inline::meta-reference` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |


--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `inline::meta-reference-quantized` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |


--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@ -15,9 +15,12 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::ollama` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |


@ -119,7 +122,7 @@ llama stack run ./run-with-safety.yaml \
 ### (Optional) Update Model Serving Configuration

 ```{note}
-Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) variable for supported Ollama models.
+Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
 ```

 To serve a new model with `ollama`
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@ -16,9 +16,12 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::tgi` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |


--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@ -15,9 +15,12 @@ The `llamastack/distribution-together` distribution consists of the following pr
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
+| datasetio | `remote::huggingface`, `inline::localfs` |
+| eval | `inline::meta-reference` |
 | inference | `remote::together` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
+| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
 | telemetry | `inline::meta-reference` |


--- a/llama_stack/distribution/ui/README.md
+++ b/llama_stack/distribution/ui/README.md
@ -1,16 +1,41 @@
-# LLama Stack UI
+# (Experimental) LLama Stack UI

-[!NOTE] This is a work in progress.
+## Docker Setup

-## Prerequisite
- Start up Llama Stack Server
-```
-llama stack run
-```
+:warning: This is a work in progress.

-## Running Streamlit App
+## Developer Setup
+
+1. Start up Llama Stack API server. More details [here](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).

 ```
+llama stack build --template together --image-type conda
+
+llama stack run together
+```
+
+2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
+
+```bash
+$ llama-stack-client datasets register \
+--dataset-id "mmlu" \
+--provider-id "huggingface" \
+--url "https://huggingface.co/datasets/llamastack/evals" \
+--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
+--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
+```
+
+```bash
+$ llama-stack-client eval_tasks register \
+--eval-task-id meta-reference-mmlu \
+--provider-id meta-reference \
+--dataset-id mmlu \
+--scoring-functions basic::regex_parser_multiple_choice_answer
+```
+
+3. Start Streamlit UI
+
+```bash
 cd llama_stack/distribution/ui
 pip install -r requirements.txt
 streamlit run app.py
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -6,6 +6,9 @@

 from pathlib import Path

+from llama_stack.distribution.datatypes import Provider
+
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


@ -16,10 +19,19 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["remote::bedrock"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
+    name = "bedrock"
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    return DistributionTemplate(
-        name="bedrock",
+        name=name,
        distro_type="self_hosted",
        description="Use AWS Bedrock for running LLM inference and safety",
        docker_image=None,
@ -27,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
        providers=providers,
        default_models=[],
        run_configs={
-            "run.yaml": RunConfigSettings(),
+            "run.yaml": RunConfigSettings(
+                provider_overrides={
+                    "memory": [memory_provider],
+                },
+            ),
        },
        run_config_env_vars={
            "LLAMASTACK_PORT": (
--- a/llama_stack/templates/bedrock/build.yaml
+++ b/llama_stack/templates/bedrock/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: bedrock
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -37,6 +40,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/fireworks/build.yaml
+++ b/llama_stack/templates/fireworks/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -9,6 +9,7 @@ from pathlib import Path
 from llama_models.sku_list import all_registered_models

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES

@ -22,13 +23,23 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }

+    name = "fireworks"
+
    inference_provider = Provider(
        provider_id="fireworks",
        provider_type="remote::fireworks",
        config=FireworksImplConfig.sample_run_config(),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    core_model_to_hf_repo = {
        m.descriptor(): m.huggingface_repo for m in all_registered_models()
@ -42,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
    ]

    return DistributionTemplate(
-        name="fireworks",
+        name=name,
        distro_type="self_hosted",
        description="Use Fireworks.AI for running LLM inference",
        docker_image=None,
@ -53,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=default_models,
                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: fireworks
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -39,6 +42,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/hf-endpoint/build.yaml
+++ b/llama_stack/templates/hf-endpoint/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings

@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "hf-endpoint"
    inference_provider = Provider(
        provider_id="hf-endpoint",
        provider_type="remote::hf::endpoint",
        config=InferenceEndpointImplConfig.sample_run_config(),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -34,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
    )

    return DistributionTemplate(
-        name="hf-endpoint",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
        docker_image=None,
@ -45,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
@ -59,7 +69,8 @@ def get_distribution_template() -> DistributionTemplate:
                                endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}",
                            ),
                        ),
-                    ]
+                    ],
+                    "memory": [memory_provider],
                },
                default_models=[
                    inference_model,
--- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml
+++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-endpoint
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -44,6 +47,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-endpoint
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -39,6 +42,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/hf-serverless/build.yaml
+++ b/llama_stack/templates/hf-serverless/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings

@ -16,13 +17,22 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }

+    name = "hf-serverless"
    inference_provider = Provider(
        provider_id="hf-serverless",
        provider_type="remote::hf::serverless",
        config=InferenceAPIImplConfig.sample_run_config(),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -34,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
    )

    return DistributionTemplate(
-        name="hf-serverless",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
        docker_image=None,
@ -45,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
@ -59,7 +70,8 @@ def get_distribution_template() -> DistributionTemplate:
                                repo="${env.SAFETY_MODEL}",
                            ),
                        ),
-                    ]
+                    ],
+                    "memory": [memory_provider],
                },
                default_models=[
                    inference_model,
--- a/llama_stack/templates/hf-serverless/run-with-safety.yaml
+++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-serverless
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -44,6 +47,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: hf-serverless
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -39,6 +42,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/meta-reference-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-gpu/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.inference.meta_reference import (
    MetaReferenceInferenceConfig,
 )
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "meta-reference-gpu"
    inference_provider = Provider(
        provider_id="meta-reference-inference",
        provider_type="inline::meta-reference",
@ -30,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
            checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
        ),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -41,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
    )

    return DistributionTemplate(
-        name="meta-reference-gpu",
+        name=name,
        distro_type="self_hosted",
        description="Use Meta Reference for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
@ -51,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
@ -67,6 +77,7 @@ def get_distribution_template() -> DistributionTemplate:
                            ),
                        ),
                    ],
+                    "memory": [memory_provider],
                },
                default_models=[
                    inference_model,
--- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: meta-reference-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -46,6 +49,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: meta-reference-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -40,6 +43,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.meta_reference import (
    MetaReferenceQuantizedInferenceConfig,
 )
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "meta-reference-quantized-gpu"
    inference_provider = Provider(
        provider_id="meta-reference-inference",
        provider_type="inline::meta-reference-quantized",
@ -30,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
            checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
        ),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
        provider_id="meta-reference-inference",
    )
    return DistributionTemplate(
-        name="meta-reference-quantized-gpu",
+        name=name,
        distro_type="self_hosted",
        description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
@ -46,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: meta-reference-quantized-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -42,6 +45,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@ -114,9 +114,9 @@ llama stack run ./run-with-safety.yaml \

 ### (Optional) Update Model Serving Configuration

-> [!NOTE]
-> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
-
+```{note}
+Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
+```

 To serve a new model with `ollama`
 ```bash
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@ -7,6 +7,7 @@
 from pathlib import Path

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings

@ -18,13 +19,21 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "ollama"
    inference_provider = Provider(
        provider_id="ollama",
        provider_type="remote::ollama",
        config=OllamaImplConfig.sample_run_config(),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -36,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate:
    )

    return DistributionTemplate(
-        name="ollama",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) Ollama server for running LLM inference",
        docker_image=None,
@ -47,6 +56,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
@ -54,7 +64,8 @@ def get_distribution_template() -> DistributionTemplate:
                provider_overrides={
                    "inference": [
                        inference_provider,
-                    ]
+                    ],
+                    "memory": [memory_provider],
                },
                default_models=[
                    inference_model,
--- a/llama_stack/templates/ollama/run-with-safety.yaml
+++ b/llama_stack/templates/ollama/run-with-safety.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: ollama
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -38,6 +41,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: ollama
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -38,6 +41,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -7,6 +7,7 @@
 from pathlib import Path

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings

@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
    }
-
+    name = "remote-vllm"
    inference_provider = Provider(
        provider_id="vllm-inference",
        provider_type="remote::vllm",
@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
            url="${env.VLLM_URL}",
        ),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
    )

    return DistributionTemplate(
-        name="remote-vllm",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) vLLM server for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
@ -63,6 +70,7 @@ def get_distribution_template() -> DistributionTemplate:
                            ),
                        ),
                    ],
+                    "memory": [memory_provider],
                },
                default_models=[
                    inference_model,
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -44,36 +44,37 @@ class RunConfigSettings(BaseModel):
                provider_configs[api_str] = api_providers
                continue

-            provider_type = provider_types[0]
-            provider_id = provider_type.split("::")[-1]
+            provider_configs[api_str] = []
+            for provider_type in provider_types:
+                provider_id = provider_type.split("::")[-1]

-            api = Api(api_str)
-            if provider_type not in provider_registry[api]:
-                raise ValueError(
-                    f"Unknown provider type: {provider_type} for API: {api_str}"
+                api = Api(api_str)
+                if provider_type not in provider_registry[api]:
+                    raise ValueError(
+                        f"Unknown provider type: {provider_type} for API: {api_str}"
+                    )
+
+                config_class = provider_registry[api][provider_type].config_class
+                assert (
+                    config_class is not None
+                ), f"No config class for provider type: {provider_type} for API: {api_str}"
+
+                config_class = instantiate_class_type(config_class)
+                if hasattr(config_class, "sample_run_config"):
+                    config = config_class.sample_run_config(
+                        __distro_dir__=f"distributions/{name}"
+                    )
+                else:
+                    config = {}
+
+                provider_configs[api_str].append(
+                    Provider(
+                        provider_id=provider_id,
+                        provider_type=provider_type,
+                        config=config,
+                    )
                )

-            config_class = provider_registry[api][provider_type].config_class
-            assert (
-                config_class is not None
-            ), f"No config class for provider type: {provider_type} for API: {api_str}"
-
-            config_class = instantiate_class_type(config_class)
-            if hasattr(config_class, "sample_run_config"):
-                config = config_class.sample_run_config(
-                    __distro_dir__=f"distributions/{name}"
-                )
-            else:
-                config = {}
-
-            provider_configs[api_str] = [
-                Provider(
-                    provider_id=provider_id,
-                    provider_type=provider_type,
-                    config=config,
-                )
-            ]
-
        # Get unique set of APIs from providers
        apis = list(sorted(providers.keys()))

--- a/llama_stack/templates/tgi/build.yaml
+++ b/llama_stack/templates/tgi/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/tgi/run-with-safety.yaml
+++ b/llama_stack/templates/tgi/run-with-safety.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: tgi
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -42,6 +45,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: tgi
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -38,6 +41,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@ -7,6 +7,7 @@
 from pathlib import Path

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import TGIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings

@ -18,8 +19,11 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "tgi"
    inference_provider = Provider(
        provider_id="tgi-inference",
        provider_type="remote::tgi",
@ -27,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
            url="${env.TGI_URL}",
        ),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -38,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
    )

    return DistributionTemplate(
-        name="tgi",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) TGI server for running LLM inference",
        docker_image=None,
@ -49,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
@ -64,6 +74,7 @@ def get_distribution_template() -> DistributionTemplate:
                            ),
                        ),
                    ],
+                    "memory": [memory_provider],
                },
                default_models=[
                    inference_model,
--- a/llama_stack/templates/together/build.yaml
+++ b/llama_stack/templates/together/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: together
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -39,6 +42,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -9,6 +9,7 @@ from pathlib import Path
 from llama_models.sku_list import all_registered_models

 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES

@ -22,13 +23,21 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "together"
    inference_provider = Provider(
        provider_id="together",
        provider_type="remote::together",
        config=TogetherImplConfig.sample_run_config(),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    core_model_to_hf_repo = {
        m.descriptor(): m.huggingface_repo for m in all_registered_models()
@ -42,7 +51,7 @@ def get_distribution_template() -> DistributionTemplate:
    ]

    return DistributionTemplate(
-        name="together",
+        name=name,
        distro_type="self_hosted",
        description="Use Together.AI for running LLM inference",
        docker_image=None,
@ -53,6 +62,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=default_models,
                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
--- a/llama_stack/templates/vllm-gpu/build.yaml
+++ b/llama_stack/templates/vllm-gpu/build.yaml
@ -16,4 +16,13 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    eval:
+    - inline::meta-reference
+    datasetio:
+    - remote::huggingface
+    - inline::localfs
+    scoring:
+    - inline::basic
+    - inline::llm-as-judge
+    - inline::braintrust
 image_type: conda
--- a/llama_stack/templates/vllm-gpu/run.yaml
+++ b/llama_stack/templates/vllm-gpu/run.yaml
@ -4,9 +4,12 @@ docker_image: null
 conda_env: vllm-gpu
 apis:
 - agents
+- datasetio
+- eval
 - inference
 - memory
 - safety
+- scoring
 - telemetry
 providers:
  inference:
@ -42,6 +45,27 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config: {}
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config: {}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config: {}
 metadata_store:
  namespace: null
  type: sqlite
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@ -6,6 +6,7 @@

 from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.vllm import VLLMConfig
+from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings


@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "eval": ["inline::meta-reference"],
+        "datasetio": ["remote::huggingface", "inline::localfs"],
+        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "vllm-gpu"
    inference_provider = Provider(
        provider_id="vllm",
        provider_type="inline::vllm",
        config=VLLMConfig.sample_run_config(),
    )
+    memory_provider = Provider(
+        provider_id="faiss",
+        provider_type="inline::faiss",
+        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
+    )

    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -30,7 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
    )

    return DistributionTemplate(
-        name="vllm-gpu",
+        name=name,
        distro_type="self_hosted",
        description="Use a built-in vLLM engine for running LLM inference",
        docker_image=None,
@ -41,6 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
+                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),