From 04de2f84e96e6b448b6d2d1a826ebcb5e223d7ad Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 27 Feb 2025 16:39:23 -0800
Subject: [PATCH] fix: register provider model name and HF alias in run.yaml
 (#1304)

Each model known to the system has two identifiers:

- the `provider_resource_id` (what the provider calls it) -- e.g.,
`accounts/fireworks/models/llama-v3p1-8b-instruct`
- the `identifier` (`model_id`) under which it is registered and gets
routed to the appropriate provider.

We have so far used the HuggingFace repo alias as the standardized
identifier you can use to refer to the model. So in the above example,
we'd use `meta-llama/Llama-3.1-8B-Instruct` as the name under which it
gets registered. This makes it convenient for users to refer to these
models across providers.

However, we forgot to register the _actual_ provider model ID also. You
should be able to route via `provider_resource_id` also, of course.

This change fixes this (somewhat grave) omission.

*Note*: this change is additive -- more aliases work now compared to
before.

## Test Plan

Run the following for distro=(ollama fireworks together)
```
LLAMA_STACK_CONFIG=$distro \
   pytest -s -v tests/client-sdk/inference/test_text_inference.py \
   --inference-model=meta-llama/Llama-3.1-8B-Instruct --vision-inference-model=""
```
---
 .../remote_hosted_distro/nvidia.md            | 26 +++----
 .../self_hosted_distro/bedrock.md             |  6 +-
 .../self_hosted_distro/cerebras.md            |  4 +-
 .../self_hosted_distro/fireworks.md           | 22 +++---
 .../distributions/self_hosted_distro/groq.md  | 10 +--
 .../self_hosted_distro/sambanova.md           | 18 ++---
 .../self_hosted_distro/together.md            | 22 +++---
 .../remote/inference/cerebras/cerebras.py     |  4 +-
 .../remote/inference/cerebras/models.py       |  2 +-
 .../providers/remote/inference/groq/models.py | 13 ++--
 .../remote/inference/nvidia/models.py         |  2 +-
 .../remote/inference/nvidia/nvidia.py         |  4 +-
 llama_stack/templates/bedrock/bedrock.py      | 19 ++---
 llama_stack/templates/bedrock/doc_template.md |  2 +-
 llama_stack/templates/bedrock/run.yaml        | 15 ++++
 llama_stack/templates/cerebras/cerebras.py    | 20 ++----
 .../templates/cerebras/doc_template.md        |  2 +-
 llama_stack/templates/cerebras/run.yaml       | 10 +++
 llama_stack/templates/ci-tests/ci_tests.py    | 19 ++---
 llama_stack/templates/ci-tests/run.yaml       | 61 ++++++++++++++++
 llama_stack/templates/dell/dell.py            |  3 -
 llama_stack/templates/dev/dev.py              | 24 ++-----
 llama_stack/templates/dev/run.yaml            | 72 ++++++++++++++++++-
 .../templates/fireworks/doc_template.md       |  2 +-
 llama_stack/templates/fireworks/fireworks.py  | 21 ++----
 .../templates/fireworks/run-with-safety.yaml  | 50 +++++++++++++
 llama_stack/templates/fireworks/run.yaml      | 50 +++++++++++++
 llama_stack/templates/groq/doc_template.md    |  2 +-
 llama_stack/templates/groq/groq.py            | 21 ++----
 llama_stack/templates/groq/run.yaml           | 22 +++++-
 .../templates/hf-endpoint/hf_endpoint.py      |  1 -
 .../templates/hf-serverless/hf_serverless.py  |  1 -
 .../meta-reference-gpu/meta_reference.py      |  1 -
 .../meta_reference.py                         |  1 -
 llama_stack/templates/nvidia/doc_template.md  |  2 +-
 llama_stack/templates/nvidia/nvidia.py        | 24 +++----
 llama_stack/templates/nvidia/run.yaml         | 45 ++++++++++++
 llama_stack/templates/ollama/ollama.py        |  1 -
 llama_stack/templates/remote-vllm/vllm.py     |  1 -
 .../templates/sambanova/doc_template.md       |  2 +-
 llama_stack/templates/sambanova/run.yaml      | 45 ++++++++++++
 llama_stack/templates/sambanova/sambanova.py  | 20 ++----
 llama_stack/templates/template.py             | 47 +++++++++++-
 llama_stack/templates/tgi/tgi.py              |  1 -
 .../templates/together/doc_template.md        |  2 +-
 .../templates/together/run-with-safety.yaml   | 45 ++++++++++++
 llama_stack/templates/together/run.yaml       | 45 ++++++++++++
 llama_stack/templates/together/together.py    | 21 ++----
 llama_stack/templates/vllm-gpu/vllm.py        |  1 -
 49 files changed, 637 insertions(+), 217 deletions(-)

diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md
index 20a10ba4d..efa0a2d74 100644
--- a/docs/source/distributions/remote_hosted_distro/nvidia.md
+++ b/docs/source/distributions/remote_hosted_distro/nvidia.md
@@ -27,19 +27,19 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)`
-- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)`
-- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)`
-- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)`
-- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)`
-- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)`
-- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)`
-- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)`
-- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)`
-- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)`
-- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)`
-- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)`
-- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)`
+- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
+- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
+- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
+- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
+- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
+- `nvidia/nv-embedqa-e5-v5 `
+- `nvidia/nv-embedqa-mistral-7b-v2 `
+- `snowflake/arctic-embed-l `
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md
index 14f004926..623ab6848 100644
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@@ -34,9 +34,9 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)`
-- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)`
-- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)`
+- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md
index 6e2af14fd..8f14ae7cc 100644
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@@ -27,8 +27,8 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)`
-- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)`
+- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md
index f69e6d963..1fcd6f7af 100644
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@@ -37,17 +37,17 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)`
-- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)`
-- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)`
-- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)`
-- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)`
-- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)`
-- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)`
-- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)`
-- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)`
-- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)`
-- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)`
+- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
+- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
+- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)`
+- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
+- `nomic-ai/nomic-embed-text-v1.5 `
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md
index 9fb7b2619..ce3f8aecc 100644
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ b/docs/source/distributions/self_hosted_distro/groq.md
@@ -37,11 +37,11 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)`
-- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)`
-- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)`
-- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)`
-- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)`
+- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `groq/llama-3.1-8b-instant `
+- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)`
+- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md
index e6ac616be..a7f738261 100644
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@@ -34,15 +34,15 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
-- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
-- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
-- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
-- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
-- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)`
-- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
-- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
-- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)`
+- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
+- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
+- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index 7af0dcf4d..f361e93c7 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -37,17 +37,17 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct`
-- `meta-llama/Llama-3.1-70B-Instruct`
-- `meta-llama/Llama-3.1-405B-Instruct-FP8`
-- `meta-llama/Llama-3.2-3B-Instruct`
-- `meta-llama/Llama-3.2-11B-Vision-Instruct`
-- `meta-llama/Llama-3.2-90B-Vision-Instruct`
-- `meta-llama/Llama-3.3-70B-Instruct`
-- `meta-llama/Llama-Guard-3-8B`
-- `meta-llama/Llama-Guard-3-11B-Vision`
-- `togethercomputer/m2-bert-80M-8k-retrieval`
-- `togethercomputer/m2-bert-80M-32k-retrieval`
+- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)`
+- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)`
+- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
+- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)`
+- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
+- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
+- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
+- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
+- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
+- `togethercomputer/m2-bert-80M-8k-retrieval `
+- `togethercomputer/m2-bert-80M-32k-retrieval `
 
 
 ### Prerequisite: API Keys
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py
index 4deeea630..748c5237a 100644
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 
 from .config import CerebrasImplConfig
-from .models import model_entries
+from .models import MODEL_ENTRIES
 
 
 class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
     def __init__(self, config: CerebrasImplConfig) -> None:
         ModelRegistryHelper.__init__(
             self,
-            model_entries=model_entries,
+            model_entries=MODEL_ENTRIES,
         )
         self.config = config
 
diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py
index a48864d49..37419bf4c 100644
--- a/llama_stack/providers/remote/inference/cerebras/models.py
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import (
     build_hf_repo_model_entry,
 )
 
-model_entries = [
+MODEL_ENTRIES = [
     build_hf_repo_model_entry(
         "llama3.1-8b",
         CoreModelId.llama3_1_8b_instruct.value,
diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py
index 4364edffa..08b9b4dc4 100644
--- a/llama_stack/providers/remote/inference/groq/models.py
+++ b/llama_stack/providers/remote/inference/groq/models.py
@@ -5,10 +5,13 @@
 # the root directory of this source tree.
 
 from llama_stack.models.llama.sku_list import CoreModelId
-from llama_stack.providers.utils.inference.model_registry import build_model_entry
+from llama_stack.providers.utils.inference.model_registry import (
+    build_hf_repo_model_entry,
+    build_model_entry,
+)
 
 MODEL_ENTRIES = [
-    build_model_entry(
+    build_hf_repo_model_entry(
         "groq/llama3-8b-8192",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
@@ -16,11 +19,11 @@ MODEL_ENTRIES = [
         "groq/llama-3.1-8b-instant",
         CoreModelId.llama3_1_8b_instruct.value,
     ),
-    build_model_entry(
+    build_hf_repo_model_entry(
         "groq/llama3-70b-8192",
         CoreModelId.llama3_70b_instruct.value,
     ),
-    build_model_entry(
+    build_hf_repo_model_entry(
         "groq/llama-3.3-70b-versatile",
         CoreModelId.llama3_3_70b_instruct.value,
     ),
@@ -28,7 +31,7 @@ MODEL_ENTRIES = [
     # Preview models aren't recommended for production use, but we include this one
     # to pass the test fixture
     # TODO(aidand): Replace this with a stable model once Groq supports it
-    build_model_entry(
+    build_hf_repo_model_entry(
         "groq/llama-3.2-3b-preview",
         CoreModelId.llama3_2_3b_instruct.value,
     ),
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
index a855566bc..879855003 100644
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import (
     build_hf_repo_model_entry,
 )
 
-_MODEL_ENTRIES = [
+MODEL_ENTRIES = [
     build_hf_repo_model_entry(
         "meta/llama3-8b-instruct",
         CoreModelId.llama3_8b_instruct.value,
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index cc3bd85bb..2d93bb445 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
 from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
 
 from . import NVIDIAConfig
-from .models import _MODEL_ENTRIES
+from .models import MODEL_ENTRIES
 from .openai_utils import (
     convert_chat_completion_request,
     convert_completion_request,
@@ -62,7 +62,7 @@ logger = logging.getLogger(__name__)
 class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
     def __init__(self, config: NVIDIAConfig) -> None:
         # TODO(mf): filter by available models
-        ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
+        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
 
         logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
 
diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
index 628e78612..18e287390 100644
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@@ -6,12 +6,10 @@
 
 from pathlib import Path
 
-from llama_stack.apis.models import ModelInput
 from llama_stack.distribution.datatypes import Provider, ToolGroupInput
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate:
         config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
     )
 
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
+        "bedrock": MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
 
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model],
-            provider_model_id=m.provider_model_id,
-            provider_id="bedrock",
-        )
-        for m in MODEL_ENTRIES
-    ]
     default_tool_groups = [
         ToolGroupInput(
             toolgroup_id="builtin::websearch",
@@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md
index 357638ea5..24106525a 100644
--- a/llama_stack/templates/bedrock/doc_template.md
+++ b/llama_stack/templates/bedrock/doc_template.md
@@ -28,7 +28,7 @@ The following environment variables can be configured:
 The following models are available by default:
 
 {% for model in default_models %}
-- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 
diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml
index 7d03b7c29..00a02e0d5 100644
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@@ -88,16 +88,31 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
 models:
+- metadata: {}
+  model_id: meta.llama3-1-8b-instruct-v1:0
+  provider_id: bedrock
+  provider_model_id: meta.llama3-1-8b-instruct-v1:0
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: bedrock
   provider_model_id: meta.llama3-1-8b-instruct-v1:0
   model_type: llm
+- metadata: {}
+  model_id: meta.llama3-1-70b-instruct-v1:0
+  provider_id: bedrock
+  provider_model_id: meta.llama3-1-70b-instruct-v1:0
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: bedrock
   provider_model_id: meta.llama3-1-70b-instruct-v1:0
   model_type: llm
+- metadata: {}
+  model_id: meta.llama3-1-405b-instruct-v1:0
+  provider_id: bedrock
+  provider_model_id: meta.llama3-1-405b-instruct-v1:0
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: bedrock
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index 544a50c03..bda22a498 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -8,14 +8,13 @@ from pathlib import Path
 
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
-from llama_stack.providers.remote.inference.cerebras.models import model_entries
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate:
         config=SentenceTransformersInferenceConfig.sample_run_config(),
     )
 
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model],
-            provider_model_id=m.provider_model_id,
-            provider_id="cerebras",
-        )
-        for m in model_entries
-    ]
+    available_models = {
+        "cerebras": MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
     embedding_model = ModelInput(
         model_id="all-MiniLM-L6-v2",
         provider_id="sentence-transformers",
@@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md
index 77fc6f478..3f5645958 100644
--- a/llama_stack/templates/cerebras/doc_template.md
+++ b/llama_stack/templates/cerebras/doc_template.md
@@ -20,7 +20,7 @@ The following environment variables can be configured:
 The following models are available by default:
 
 {% for model in default_models %}
-- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 
diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml
index 6afff2be2..43d3158ba 100644
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@@ -90,11 +90,21 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
 models:
+- metadata: {}
+  model_id: llama3.1-8b
+  provider_id: cerebras
+  provider_model_id: llama3.1-8b
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: cerebras
   provider_model_id: llama3.1-8b
   model_type: llm
+- metadata: {}
+  model_id: llama-3.3-70b
+  provider_id: cerebras
+  provider_model_id: llama-3.3-70b
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: cerebras
diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py
index a93cfff9c..979256fa1 100644
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate:
             provider_id="code-interpreter",
         ),
     ]
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
-            provider_id="fireworks",
-            model_type=m.model_type,
-            metadata=m.metadata,
-        )
-        for m in MODEL_ENTRIES
-    ]
+    available_models = {
+        "fireworks": MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
     embedding_model = ModelInput(
         model_id="all-MiniLM-L6-v2",
         provider_id="sentence-transformers",
@@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=None,
         providers=providers,
-        default_models=default_models + [embedding_model],
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml
index 295d72e71..3a973cabf 100644
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@@ -90,51 +90,112 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
 models:
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-8b
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-8B
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
+  model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-11B-Vision
   provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
   model_type: llm
 - metadata:
     embedding_dimension: 768
     context_length: 8192
   model_id: nomic-ai/nomic-embed-text-v1.5
   provider_id: fireworks
+  provider_model_id: nomic-ai/nomic-embed-text-v1.5
   model_type: embedding
 - metadata:
     embedding_dimension: 384
diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py
index 8348beafd..52c5a5476 100644
--- a/llama_stack/templates/dell/dell.py
+++ b/llama_stack/templates/dell/dell.py
@@ -3,7 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from pathlib import Path
 
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
@@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
         distro_type="self_hosted",
         description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
         container_image=None,
-        template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=[inference_model, embedding_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py
index fe80c3842..694913119 100644
--- a/llama_stack/templates/dev/dev.py
+++ b/llama_stack/templates/dev/dev.py
@@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
@@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
@@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
         ),
     ]
     inference_providers = []
-    default_models = []
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {}
     for provider_id, model_entries, config in providers:
         inference_providers.append(
             Provider(
@@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
                 config=config,
             )
         )
-        default_models.extend(
-            ModelInput(
-                model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
-                provider_model_id=m.provider_model_id,
-                provider_id=provider_id,
-                model_type=m.model_type,
-                metadata=m.metadata,
-            )
-            for m in model_entries
-        )
-    return inference_providers, default_models
+        available_models[provider_id] = model_entries
+    return inference_providers, available_models
 
 
 def get_distribution_template() -> DistributionTemplate:
-    inference_providers, default_models = get_inference_providers()
+    inference_providers, available_models = get_inference_providers()
     providers = {
         "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
         "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
@@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
         },
     )
 
+    default_models = get_model_registry(available_models)
     return DistributionTemplate(
         name=name,
         distro_type="self_hosted",
@@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=None,
         providers=providers,
-        default_models=[],
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml
index 0ada465e4..f1d72d572 100644
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@@ -136,51 +136,101 @@ models:
   provider_id: openai
   provider_model_id: openai/text-embedding-3-large
   model_type: embedding
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-8b
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-8B
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-guard-3-8b
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-11B-Vision
   provider_id: fireworks
@@ -247,25 +297,45 @@ models:
   provider_model_id: gemini/text-embedding-004
   model_type: embedding
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/llama3-8b-8192
   provider_id: groq
   provider_model_id: groq/llama3-8b-8192
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: groq
+  provider_model_id: groq/llama3-8b-8192
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.1-8b-instant
+  provider_id: groq
   provider_model_id: groq/llama-3.1-8b-instant
   model_type: llm
+- metadata: {}
+  model_id: groq/llama3-70b-8192
+  provider_id: groq
+  provider_model_id: groq/llama3-70b-8192
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3-70B-Instruct
   provider_id: groq
   provider_model_id: groq/llama3-70b-8192
   model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.3-70b-versatile
+  provider_id: groq
+  provider_model_id: groq/llama-3.3-70b-versatile
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: groq
   provider_model_id: groq/llama-3.3-70b-versatile
   model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.2-3b-preview
+  provider_id: groq
+  provider_model_id: groq/llama-3.2-3b-preview
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: groq
diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md
index 48677d571..6c7743cb8 100644
--- a/llama_stack/templates/fireworks/doc_template.md
+++ b/llama_stack/templates/fireworks/doc_template.md
@@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 
 {% for model in default_models %}
-- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index c78664dde..0111bc118 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate:
         config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
     )
 
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
-            provider_model_id=m.provider_model_id,
-            provider_id="fireworks",
-            metadata=m.metadata,
-            model_type=m.model_type,
-        )
-        for m in MODEL_ENTRIES
-    ]
+    available_models = {
+        "fireworks": MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
+
     embedding_model = ModelInput(
         model_id="all-MiniLM-L6-v2",
         provider_id="sentence-transformers",
@@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml
index 6f622c7d9..0fe5f3026 100644
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@@ -99,51 +99,101 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
 models:
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-8b
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-8B
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-guard-3-8b
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-11B-Vision
   provider_id: fireworks
diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml
index e6d21d10d..cbe85c4f7 100644
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@@ -93,51 +93,101 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
 models:
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-8b
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-8B
   provider_id: fireworks
   provider_model_id: accounts/fireworks/models/llama-guard-3-8b
   model_type: llm
+- metadata: {}
+  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  provider_id: fireworks
+  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-11B-Vision
   provider_id: fireworks
diff --git a/llama_stack/templates/groq/doc_template.md b/llama_stack/templates/groq/doc_template.md
index 3f9ccbd16..85b916ccd 100644
--- a/llama_stack/templates/groq/doc_template.md
+++ b/llama_stack/templates/groq/doc_template.md
@@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 
 {% for model in default_models %}
-- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 
diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py
index b0c7a3804..71c504cde 100644
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import (
     Provider,
     ToolGroupInput,
 )
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.inference.groq import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate:
         },
     )
 
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
-            provider_model_id=m.provider_model_id,
-            provider_id=name,
-            model_type=m.model_type,
-            metadata=m.metadata,
-        )
-        for m in MODEL_ENTRIES
-    ]
-
+    available_models = {
+        "groq": MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
     default_tool_groups = [
         ToolGroupInput(
             toolgroup_id="builtin::websearch",
@@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
         docker_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml
index 220aa847b..78212c8d9 100644
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@@ -91,25 +91,45 @@ metadata_store:
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
 models:
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/llama3-8b-8192
   provider_id: groq
   provider_model_id: groq/llama3-8b-8192
   model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: groq
+  provider_model_id: groq/llama3-8b-8192
+  model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.1-8b-instant
+  provider_id: groq
   provider_model_id: groq/llama-3.1-8b-instant
   model_type: llm
+- metadata: {}
+  model_id: groq/llama3-70b-8192
+  provider_id: groq
+  provider_model_id: groq/llama3-70b-8192
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3-70B-Instruct
   provider_id: groq
   provider_model_id: groq/llama3-70b-8192
   model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.3-70b-versatile
+  provider_id: groq
+  provider_model_id: groq/llama-3.3-70b-versatile
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: groq
   provider_model_id: groq/llama-3.3-70b-versatile
   model_type: llm
+- metadata: {}
+  model_id: groq/llama-3.2-3b-preview
+  provider_id: groq
+  provider_model_id: groq/llama-3.2-3b-preview
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: groq
diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py
index 62584929c..f2849f0bc 100644
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=None,
         providers=providers,
-        default_models=[inference_model, safety_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py
index af04e39d4..cea1075e2 100644
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=None,
         providers=providers,
-        default_models=[inference_model, safety_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py
index 9bff981d1..3c38e0edd 100644
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate:
         description="Use Meta Reference for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=[inference_model, safety_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
index fca15fcc5..32476f37f 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
         description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=[inference_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md
index 9d9006a27..71b8ac32f 100644
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@@ -20,7 +20,7 @@ The following environment variables can be configured:
 The following models are available by default:
 
 {% for model in default_models %}
-- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
index 56d13a09a..cc5e96333 100644
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@@ -6,11 +6,10 @@
 
 from pathlib import Path
 
-from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
-from llama_stack.models.llama.sku_list import all_registered_models
+from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate:
         config=NVIDIAConfig.sample_run_config(),
     )
 
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
-            provider_model_id=m.provider_model_id,
-            provider_id="nvidia",
-            model_type=m.model_type,
-            metadata=m.metadata,
-        )
-        for m in _MODEL_ENTRIES
-    ]
+    available_models = {
+        "nvidia": MODEL_ENTRIES,
+    }
     default_tool_groups = [
         ToolGroupInput(
             toolgroup_id="builtin::websearch",
@@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
         ),
     ]
 
+    default_models = get_model_registry(available_models)
     return DistributionTemplate(
         name="nvidia",
         distro_type="remote_hosted",
@@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index bfbad749a..52e78df7b 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -90,46 +90,91 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
 models:
+- metadata: {}
+  model_id: meta/llama3-8b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama3-8b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3-8B-Instruct
   provider_id: nvidia
   provider_model_id: meta/llama3-8b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama3-70b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama3-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3-70B-Instruct
   provider_id: nvidia
   provider_model_id: meta/llama3-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.1-8b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.1-8b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: nvidia
   provider_model_id: meta/llama-3.1-8b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.1-70b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.1-70b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: nvidia
   provider_model_id: meta/llama-3.1-70b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.1-405b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.1-405b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: nvidia
   provider_model_id: meta/llama-3.1-405b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.2-1b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.2-1b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: nvidia
   provider_model_id: meta/llama-3.2-1b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.2-3b-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.2-3b-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: nvidia
   provider_model_id: meta/llama-3.2-3b-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.2-11b-vision-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.2-11b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: nvidia
   provider_model_id: meta/llama-3.2-11b-vision-instruct
   model_type: llm
+- metadata: {}
+  model_id: meta/llama-3.2-90b-vision-instruct
+  provider_id: nvidia
+  provider_model_id: meta/llama-3.2-90b-vision-instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: nvidia
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index ba3cfe684..83c7b1a63 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=[inference_model, safety_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index 10d291456..73ee36c3f 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate:
         description="Use (an external) vLLM server for running LLM inference",
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=[inference_model, safety_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md
index 4b18aa756..b2a295716 100644
--- a/llama_stack/templates/sambanova/doc_template.md
+++ b/llama_stack/templates/sambanova/doc_template.md
@@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 
 {% for model in default_models %}
-- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index 26815dcd0..124d11baf 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -68,46 +68,91 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
 models:
+- metadata: {}
+  model_id: Meta-Llama-3.1-8B-Instruct
+  provider_id: sambanova
+  provider_model_id: Meta-Llama-3.1-8B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: sambanova
   provider_model_id: Meta-Llama-3.1-8B-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Meta-Llama-3.1-70B-Instruct
+  provider_id: sambanova
+  provider_model_id: Meta-Llama-3.1-70B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: sambanova
   provider_model_id: Meta-Llama-3.1-70B-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Meta-Llama-3.1-405B-Instruct
+  provider_id: sambanova
+  provider_model_id: Meta-Llama-3.1-405B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: sambanova
   provider_model_id: Meta-Llama-3.1-405B-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Meta-Llama-3.2-1B-Instruct
+  provider_id: sambanova
+  provider_model_id: Meta-Llama-3.2-1B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
   provider_id: sambanova
   provider_model_id: Meta-Llama-3.2-1B-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Meta-Llama-3.2-3B-Instruct
+  provider_id: sambanova
+  provider_model_id: Meta-Llama-3.2-3B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: sambanova
   provider_model_id: Meta-Llama-3.2-3B-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Meta-Llama-3.3-70B-Instruct
+  provider_id: sambanova
+  provider_model_id: Meta-Llama-3.3-70B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: sambanova
   provider_model_id: Meta-Llama-3.3-70B-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Llama-3.2-11B-Vision-Instruct
+  provider_id: sambanova
+  provider_model_id: Llama-3.2-11B-Vision-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: sambanova
   provider_model_id: Llama-3.2-11B-Vision-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Llama-3.2-90B-Vision-Instruct
+  provider_id: sambanova
+  provider_model_id: Llama-3.2-90B-Vision-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: sambanova
   provider_model_id: Llama-3.2-90B-Vision-Instruct
   model_type: llm
+- metadata: {}
+  model_id: Meta-Llama-Guard-3-8B
+  provider_id: sambanova
+  provider_model_id: Meta-Llama-Guard-3-8B
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-8B
   provider_id: sambanova
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index 725c6abc4..0a0b6bd7e 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -7,15 +7,13 @@
 from pathlib import Path
 
 from llama_stack.distribution.datatypes import (
-    ModelInput,
     Provider,
     ShieldInput,
     ToolGroupInput,
 )
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
 from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate:
         config=SambaNovaImplConfig.sample_run_config(),
     )
 
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model],
-            provider_model_id=m.provider_model_id,
-            provider_id=name,
-        )
-        for m in MODEL_ENTRIES
-    ]
-
+    available_models = {
+        name: MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
     default_tool_groups = [
         ToolGroupInput(
             toolgroup_id="builtin::websearch",
@@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
         docker_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index cb5b07be3..2afb84a63 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
+from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 
 
+def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]:
+    models = []
+    for provider_id, entries in available_models.items():
+        for entry in entries:
+            ids = [entry.provider_model_id] + entry.aliases
+            for model_id in ids:
+                models.append(
+                    ModelInput(
+                        model_id=model_id,
+                        provider_model_id=entry.provider_model_id,
+                        provider_id=provider_id,
+                        model_type=entry.model_type,
+                        metadata=entry.metadata,
+                    )
+                )
+    return models
+
+
+class DefaultModel(BaseModel):
+    model_id: str
+    doc_string: str
+
+
 class RunConfigSettings(BaseModel):
     provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
     default_models: Optional[List[ModelInput]] = None
@@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel):
     run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
     container_image: Optional[str] = None
 
-    default_models: Optional[List[ModelInput]] = None
+    available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None
 
     def build_config(self) -> BuildConfig:
         return BuildConfig(
@@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel):
             autoescape=True,
         )
         template = env.from_string(template)
+
+        default_models = []
+        if self.available_models_by_provider:
+            has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
+            for provider_id, model_entries in self.available_models_by_provider.items():
+                for model_entry in model_entries:
+                    doc_parts = []
+                    if model_entry.aliases:
+                        doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
+                    if has_multiple_providers:
+                        doc_parts.append(f"provider: {provider_id}")
+
+                    default_models.append(
+                        DefaultModel(
+                            model_id=model_entry.provider_model_id,
+                            doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "",
+                        )
+                    )
+
         return template.render(
             name=self.name,
             description=self.description,
             providers=self.providers,
             providers_table=providers_table,
             run_config_env_vars=self.run_config_env_vars,
-            default_models=self.default_models,
+            default_models=default_models,
         )
 
     def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py
index 9b80414f9..eb49871a0 100644
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=[inference_model, safety_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md
index 405d68f91..be055a43e 100644
--- a/llama_stack/templates/together/doc_template.md
+++ b/llama_stack/templates/together/doc_template.md
@@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 
 {% for model in default_models %}
-- `{{ model.model_id }}`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
 
diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml
index 9193a3ef6..26d879802 100644
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@@ -99,46 +99,91 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
 models:
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-Guard-3-8B
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-8B
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-11B-Vision
   provider_id: together
diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml
index 32ddf7b16..0969cfe56 100644
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@@ -93,46 +93,91 @@ metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
 models:
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.3-70B-Instruct
   provider_id: together
   provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Meta-Llama-Guard-3-8B
+  provider_id: together
+  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-8B
   provider_id: together
   provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
   model_type: llm
+- metadata: {}
+  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  provider_id: together
+  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-Guard-3-11B-Vision
   provider_id: together
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index 8d0e2353c..24c395e1e 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
     ShieldInput,
     ToolGroupInput,
 )
-from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
     SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
 def get_distribution_template() -> DistributionTemplate:
@@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate:
         provider_type="inline::sentence-transformers",
         config=SentenceTransformersInferenceConfig.sample_run_config(),
     )
-
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
-    default_models = [
-        ModelInput(
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
-            provider_model_id=m.provider_model_id,
-            provider_id="together",
-            metadata=m.metadata,
-            model_type=m.model_type,
-        )
-        for m in MODEL_ENTRIES
-    ]
+    available_models = {
+        "together": MODEL_ENTRIES,
+    }
+    default_models = get_model_registry(available_models)
     default_tool_groups = [
         ToolGroupInput(
             toolgroup_id="builtin::websearch",
@@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={
diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py
index 8cdec589e..27a16b93d 100644
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
         container_image=None,
         template_path=None,
         providers=providers,
-        default_models=[inference_model],
         run_configs={
             "run.yaml": RunConfigSettings(
                 provider_overrides={