fix: register provider model name and HF alias in run.yaml

2025-08-12 04:50:39 +00:00 · 2025-02-27 12:03:43 -08:00 · 2025-02-27 12:03:43 -08:00 · 9f9278f9a8
commit 9f9278f9a8
parent 4780223544
48 changed files with 597 additions and 220 deletions
--- a/docs/source/distributions/remote_hosted_distro/nvidia.md
+++ b/docs/source/distributions/remote_hosted_distro/nvidia.md
@ -27,19 +27,19 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)`
+- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)`
+- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)`
+- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)`
+- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)`
+- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)`
+- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)`
+- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)`
+- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)`
+- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)`
+- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)`
+- `nvidia/nv-embedqa-e5-v5 `
- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)`
+- `nvidia/nv-embedqa-mistral-7b-v2 `
- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)`
+- `snowflake/arctic-embed-l `
 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@ -34,9 +34,9 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)`
+- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)`
+- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)`
+- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@ -27,8 +27,8 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)`
+- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)`
+- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)`
 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@ -37,17 +37,17 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)`
+- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)`
+- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)`
+- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)`
+- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)`
+- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)`
+- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)`
+- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)`
+- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)`
+- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)`
- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)`
+- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)`
+- `nomic-ai/nomic-embed-text-v1.5 `
 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ b/docs/source/distributions/self_hosted_distro/groq.md
@ -37,11 +37,11 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)`
+- `groq/llama3-8b-8192 `
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)`
+- `groq/llama-3.1-8b-instant `
- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)`
+- `groq/llama3-70b-8192 `
- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)`
+- `groq/llama-3.3-70b-versatile `
- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)`
+- `groq/llama-3.2-3b-preview `
 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@ -34,15 +34,15 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
+- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
+- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
+- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
+- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
+- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)`
+- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
+- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
+- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)`
+- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@ -37,17 +37,17 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct`
+- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct`
+- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8`
+- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-3B-Instruct`
+- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct`
+- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct`
+- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct`
+- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-Guard-3-8B`
+- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
- `meta-llama/Llama-Guard-3-11B-Vision`
+- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
- `togethercomputer/m2-bert-80M-8k-retrieval`
+- `togethercomputer/m2-bert-80M-8k-retrieval `
- `togethercomputer/m2-bert-80M-32k-retrieval`
+- `togethercomputer/m2-bert-80M-32k-retrieval `
 ### Prerequisite: API Keys
--- a/llama_stack/providers/remote/inference/cerebras/cerebras.py
+++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py
@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 )
 from .config import CerebrasImplConfig
-from .models import model_entries
+from .models import MODEL_ENTRIES
 class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
    def __init__(self, config: CerebrasImplConfig) -> None:
        ModelRegistryHelper.__init__(
            self,
-            model_entries=model_entries,
+            model_entries=MODEL_ENTRIES,
        )
        self.config = config
--- a/llama_stack/providers/remote/inference/cerebras/models.py
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import (
    build_hf_repo_model_entry,
 )
-model_entries = [
+MODEL_ENTRIES = [
    build_hf_repo_model_entry(
        "llama3.1-8b",
        CoreModelId.llama3_1_8b_instruct.value,
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import (
    build_hf_repo_model_entry,
 )
-_MODEL_ENTRIES = [
+MODEL_ENTRIES = [
    build_hf_repo_model_entry(
        "meta/llama3-8b-instruct",
        CoreModelId.llama3_8b_instruct.value,
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
 from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
 from . import NVIDIAConfig
-from .models import _MODEL_ENTRIES
+from .models import MODEL_ENTRIES
 from .openai_utils import (
    convert_chat_completion_request,
    convert_completion_request,
@ -62,7 +62,7 @@ logger = logging.getLogger(__name__)
 class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
    def __init__(self, config: NVIDIAConfig) -> None:
        # TODO(mf): filter by available models
-        ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
+        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
        logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -6,12 +6,10 @@
 from pathlib import Path
 from llama_stack.apis.models import ModelInput
 from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate:
        config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
    )
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
        "bedrock": MODEL_ENTRIES,
    }
    default_models = get_model_registry(available_models)
    default_models = [
        ModelInput(
            model_id=core_model_to_hf_repo[m.llama_model],
            provider_model_id=m.provider_model_id,
            provider_id="bedrock",
        )
        for m in MODEL_ENTRIES
    ]
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/bedrock/doc_template.md
+++ b/llama_stack/templates/bedrock/doc_template.md
@ -28,7 +28,7 @@ The following environment variables can be configured:
 The following models are available by default:
 {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -88,16 +88,31 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
 models:
 - metadata: {}
  model_id: meta.llama3-1-8b-instruct-v1:0
  provider_id: bedrock
  provider_model_id: meta.llama3-1-8b-instruct-v1:0
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: bedrock
  provider_model_id: meta.llama3-1-8b-instruct-v1:0
  model_type: llm
 - metadata: {}
  model_id: meta.llama3-1-70b-instruct-v1:0
  provider_id: bedrock
  provider_model_id: meta.llama3-1-70b-instruct-v1:0
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: bedrock
  provider_model_id: meta.llama3-1-70b-instruct-v1:0
  model_type: llm
 - metadata: {}
  model_id: meta.llama3-1-405b-instruct-v1:0
  provider_id: bedrock
  provider_model_id: meta.llama3-1-405b-instruct-v1:0
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: bedrock
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@ -8,14 +8,13 @@ from pathlib import Path
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
-from llama_stack.providers.remote.inference.cerebras.models import model_entries
+from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate:
        config=SentenceTransformersInferenceConfig.sample_run_config(),
    )
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
-    default_models = [
+        "cerebras": MODEL_ENTRIES,
-        ModelInput(
+    }
-            model_id=core_model_to_hf_repo[m.llama_model],
+    default_models = get_model_registry(available_models)
            provider_model_id=m.provider_model_id,
            provider_id="cerebras",
        )
        for m in model_entries
    ]
    embedding_model = ModelInput(
        model_id="all-MiniLM-L6-v2",
        provider_id="sentence-transformers",
@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/cerebras/doc_template.md
+++ b/llama_stack/templates/cerebras/doc_template.md
@ -20,7 +20,7 @@ The following environment variables can be configured:
 The following models are available by default:
 {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/cerebras/run.yaml
+++ b/llama_stack/templates/cerebras/run.yaml
@ -90,11 +90,21 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
 models:
 - metadata: {}
  model_id: llama3.1-8b
  provider_id: cerebras
  provider_model_id: llama3.1-8b
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: cerebras
  provider_model_id: llama3.1-8b
  model_type: llm
 - metadata: {}
  model_id: llama-3.3-70b
  provider_id: cerebras
  provider_model_id: llama-3.3-70b
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: cerebras
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import (
    ShieldInput,
    ToolGroupInput,
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate:
            provider_id="code-interpreter",
        ),
    ]
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
-    default_models = [
+        "fireworks": MODEL_ENTRIES,
-        ModelInput(
+    }
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
+    default_models = get_model_registry(available_models)
            provider_id="fireworks",
            model_type=m.model_type,
            metadata=m.metadata,
        )
        for m in MODEL_ENTRIES
    ]
    embedding_model = ModelInput(
        model_id="all-MiniLM-L6-v2",
        provider_id="sentence-transformers",
@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=None,
        providers=providers,
-        default_models=default_models + [embedding_model],
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -90,51 +90,112 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
 models:
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-8b
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-8B
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  model_type: llm
 - metadata:
    embedding_dimension: 768
    context_length: 8192
  model_id: nomic-ai/nomic-embed-text-v1.5
  provider_id: fireworks
  provider_model_id: nomic-ai/nomic-embed-text-v1.5
  model_type: embedding
 - metadata:
    embedding_dimension: 384
--- a/llama_stack/templates/dell/dell.py
+++ b/llama_stack/templates/dell/dell.py
@ -3,7 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from pathlib import Path
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
        distro_type="self_hosted",
        description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, embedding_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/dev/dev.py
+++ b/llama_stack/templates/dev/dev.py
@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import (
    ShieldInput,
    ToolGroupInput,
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
        ),
    ]
    inference_providers = []
-    default_models = []
+    available_models = {}
    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
    for provider_id, model_entries, config in providers:
        inference_providers.append(
            Provider(
@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
                config=config,
            )
        )
-        default_models.extend(
+        available_models[provider_id] = model_entries
-            ModelInput(
+    return inference_providers, available_models
                model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
                provider_model_id=m.provider_model_id,
                provider_id=provider_id,
                model_type=m.model_type,
                metadata=m.metadata,
            )
            for m in model_entries
        )
    return inference_providers, default_models
 def get_distribution_template() -> DistributionTemplate:
-    inference_providers, default_models = get_inference_providers()
+    inference_providers, available_models = get_inference_providers()
    providers = {
        "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
        "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
    )
    default_models = get_model_registry(available_models)
    return DistributionTemplate(
        name=name,
        distro_type="self_hosted",
@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=None,
        providers=providers,
-        default_models=[],
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@ -136,51 +136,101 @@ models:
  provider_id: openai
  provider_model_id: openai/text-embedding-3-large
  model_type: embedding
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-8b
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-8B
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: fireworks
@ -247,27 +297,27 @@ models:
  provider_model_id: gemini/text-embedding-004
  model_type: embedding
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/llama3-8b-8192
  provider_id: groq
  provider_model_id: groq/llama3-8b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/llama-3.1-8b-instant
  provider_id: groq
  provider_model_id: groq/llama-3.1-8b-instant
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3-70B-Instruct
+  model_id: groq/llama3-70b-8192
  provider_id: groq
  provider_model_id: groq/llama3-70b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
+  model_id: groq/llama-3.3-70b-versatile
  provider_id: groq
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
+  model_id: groq/llama-3.2-3b-preview
  provider_id: groq
  provider_model_id: groq/llama-3.2-3b-preview
  model_type: llm
--- a/llama_stack/templates/fireworks/doc_template.md
+++ b/llama_stack/templates/fireworks/doc_template.md
@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
    ShieldInput,
    ToolGroupInput,
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate:
        config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
    )
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
-    default_models = [
+        "fireworks": MODEL_ENTRIES,
-        ModelInput(
+    }
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
+    default_models = get_model_registry(available_models)
-            provider_model_id=m.provider_model_id,
+
            provider_id="fireworks",
            metadata=m.metadata,
            model_type=m.model_type,
        )
        for m in MODEL_ENTRIES
    ]
    embedding_model = ModelInput(
        model_id="all-MiniLM-L6-v2",
        provider_id="sentence-transformers",
@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -99,51 +99,101 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
 models:
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-8b
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-8B
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: fireworks
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -93,51 +93,101 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
 models:
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-8b
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-8B
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-8b
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: fireworks
--- a/llama_stack/templates/groq/doc_template.md
+++ b/llama_stack/templates/groq/doc_template.md
@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import (
    Provider,
    ToolGroupInput,
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.remote.inference.groq import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate:
        },
    )
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
-    default_models = [
+        "groq": MODEL_ENTRIES,
-        ModelInput(
+    }
-            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
+    default_models = get_model_registry(available_models)
            provider_model_id=m.provider_model_id,
            provider_id=name,
            model_type=m.model_type,
            metadata=m.metadata,
        )
        for m in MODEL_ENTRIES
    ]
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
        docker_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/groq/run.yaml
+++ b/llama_stack/templates/groq/run.yaml
@ -91,27 +91,27 @@ metadata_store:
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
 models:
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/llama3-8b-8192
  provider_id: groq
  provider_model_id: groq/llama3-8b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-8B-Instruct
+  model_id: groq/llama-3.1-8b-instant
  provider_id: groq
  provider_model_id: groq/llama-3.1-8b-instant
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3-70B-Instruct
+  model_id: groq/llama3-70b-8192
  provider_id: groq
  provider_model_id: groq/llama3-70b-8192
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.3-70B-Instruct
+  model_id: groq/llama-3.3-70b-versatile
  provider_id: groq
  provider_model_id: groq/llama-3.3-70b-versatile
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.2-3B-Instruct
+  model_id: groq/llama-3.2-3b-preview
  provider_id: groq
  provider_model_id: groq/llama-3.2-3b-preview
  model_type: llm
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model, safety_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model, safety_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate:
        description="Use Meta Reference for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
        description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@ -20,7 +20,7 @@ The following environment variables can be configured:
 The following models are available by default:
 {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -6,11 +6,10 @@
 from pathlib import Path
-from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
+from llama_stack.distribution.datatypes import Provider, ToolGroupInput
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
-from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES
+from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate:
        config=NVIDIAConfig.sample_run_config(),
    )
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
-    default_models = [
+        "nvidia": MODEL_ENTRIES,
-        ModelInput(
+    }
            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
            provider_model_id=m.provider_model_id,
            provider_id="nvidia",
            model_type=m.model_type,
            metadata=m.metadata,
        )
        for m in _MODEL_ENTRIES
    ]
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
        ),
    ]
    default_models = get_model_registry(available_models)
    return DistributionTemplate(
        name="nvidia",
        distro_type="remote_hosted",
@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -90,46 +90,91 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
 models:
 - metadata: {}
  model_id: meta/llama3-8b-instruct
  provider_id: nvidia
  provider_model_id: meta/llama3-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3-8B-Instruct
  provider_id: nvidia
  provider_model_id: meta/llama3-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama3-70b-instruct
  provider_id: nvidia
  provider_model_id: meta/llama3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3-70B-Instruct
  provider_id: nvidia
  provider_model_id: meta/llama3-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama-3.1-8b-instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.1-8b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama-3.1-70b-instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.1-70b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama-3.1-405b-instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: nvidia
  provider_model_id: meta/llama-3.1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama-3.2-1b-instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama-3.2-3b-instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-3b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama-3.2-11b-vision-instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-11b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta/llama-3.2-90b-vision-instruct
  provider_id: nvidia
  provider_model_id: meta/llama-3.2-90b-vision-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: nvidia
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate:
        description="Use (an external) vLLM server for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/sambanova/doc_template.md
+++ b/llama_stack/templates/sambanova/doc_template.md
@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -68,46 +68,91 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
 models:
 - metadata: {}
  model_id: Meta-Llama-3.1-8B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.1-8B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.1-8B-Instruct
  model_type: llm
 - metadata: {}
  model_id: Meta-Llama-3.1-70B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.1-70B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.1-70B-Instruct
  model_type: llm
 - metadata: {}
  model_id: Meta-Llama-3.1-405B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.1-405B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.1-405B-Instruct
  model_type: llm
 - metadata: {}
  model_id: Meta-Llama-3.2-1B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.2-1B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.2-1B-Instruct
  model_type: llm
 - metadata: {}
  model_id: Meta-Llama-3.2-3B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.2-3B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.2-3B-Instruct
  model_type: llm
 - metadata: {}
  model_id: Meta-Llama-3.3-70B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.3-70B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: sambanova
  provider_model_id: Meta-Llama-3.3-70B-Instruct
  model_type: llm
 - metadata: {}
  model_id: Llama-3.2-11B-Vision-Instruct
  provider_id: sambanova
  provider_model_id: Llama-3.2-11B-Vision-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: sambanova
  provider_model_id: Llama-3.2-11B-Vision-Instruct
  model_type: llm
 - metadata: {}
  model_id: Llama-3.2-90B-Vision-Instruct
  provider_id: sambanova
  provider_model_id: Llama-3.2-90B-Vision-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: sambanova
  provider_model_id: Llama-3.2-90B-Vision-Instruct
  model_type: llm
 - metadata: {}
  model_id: Meta-Llama-Guard-3-8B
  provider_id: sambanova
  provider_model_id: Meta-Llama-Guard-3-8B
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-8B
  provider_id: sambanova
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -7,15 +7,13 @@
 from pathlib import Path
 from llama_stack.distribution.datatypes import (
    ModelInput,
    Provider,
    ShieldInput,
    ToolGroupInput,
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
 from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate:
        config=SambaNovaImplConfig.sample_run_config(),
    )
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+    available_models = {
-    default_models = [
+        name: MODEL_ENTRIES,
-        ModelInput(
+    }
-            model_id=core_model_to_hf_repo[m.llama_model],
+    default_models = get_model_registry(available_models)
            provider_model_id=m.provider_model_id,
            provider_id=name,
        )
        for m in MODEL_ENTRIES
    ]
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
        docker_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import (
 )
 from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]:
    models = []
    for provider_id, entries in available_models.items():
        for entry in entries:
            ids = [entry.provider_model_id] + entry.aliases
            for model_id in ids:
                models.append(
                    ModelInput(
                        model_id=model_id,
                        provider_model_id=entry.provider_model_id,
                        provider_id=provider_id,
                        model_type=entry.model_type,
                        metadata=entry.metadata,
                    )
                )
    return models
 class DefaultModel(BaseModel):
    model_id: str
    doc_string: str
 class RunConfigSettings(BaseModel):
    provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
    default_models: Optional[List[ModelInput]] = None
@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel):
    run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
    container_image: Optional[str] = None
-    default_models: Optional[List[ModelInput]] = None
+    available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None
    def build_config(self) -> BuildConfig:
        return BuildConfig(
@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel):
            autoescape=True,
        )
        template = env.from_string(template)
        default_models = []
        if self.available_models_by_provider:
            has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
            for provider_id, model_entries in self.available_models_by_provider.items():
                for model_entry in model_entries:
                    doc_parts = []
                    if model_entry.aliases:
                        doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
                    if has_multiple_providers:
                        doc_parts.append(f"provider: {provider_id}")
                    default_models.append(
                        DefaultModel(
                            model_id=model_entry.provider_model_id,
                            doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "",
                        )
                    )
        return template.render(
            name=self.name,
            description=self.description,
            providers=self.providers,
            providers_table=providers_table,
            run_config_env_vars=self.run_config_env_vars,
-            default_models=self.default_models,
+            default_models=default_models,
        )
    def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model, safety_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/together/doc_template.md
+++ b/llama_stack/templates/together/doc_template.md
@ -30,7 +30,7 @@ The following environment variables can be configured:
 The following models are available by default:
 {% for model in default_models %}
- `{{ model.model_id }}`
+- `{{ model.model_id }} {{ model.doc_string }}`
 {% endfor %}
 {% endif %}
--- a/llama_stack/templates/together/run-with-safety.yaml
+++ b/llama_stack/templates/together/run-with-safety.yaml
@ -99,46 +99,91 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Meta-Llama-Guard-3-8B
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-8B
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: together
--- a/llama_stack/templates/together/run.yaml
+++ b/llama_stack/templates/together/run.yaml
@ -93,46 +93,91 @@ metadata_store:
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.3-70B-Instruct
  provider_id: together
  provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Meta-Llama-Guard-3-8B
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-8B
  provider_id: together
  provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  provider_id: together
  provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-Guard-3-11B-Vision
  provider_id: together
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
    ShieldInput,
    ToolGroupInput,
 )
 from llama_stack.models.llama.sku_list import all_registered_models
 from llama_stack.providers.inline.inference.sentence_transformers import (
    SentenceTransformersInferenceConfig,
 )
 from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
 from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
-from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
+from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 def get_distribution_template() -> DistributionTemplate:
@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate:
        provider_type="inline::sentence-transformers",
        config=SentenceTransformersInferenceConfig.sample_run_config(),
    )
-
+    available_models = {
-    core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
+        "together": MODEL_ENTRIES,
-    default_models = [
+    }
-        ModelInput(
+    default_models = get_model_registry(available_models)
            model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
            provider_model_id=m.provider_model_id,
            provider_id="together",
            metadata=m.metadata,
            model_type=m.model_type,
        )
        for m in MODEL_ENTRIES
    ]
    default_tool_groups = [
        ToolGroupInput(
            toolgroup_id="builtin::websearch",
@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
-        default_models=default_models,
+        available_models_by_provider=available_models,
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
--- a/llama_stack/templates/vllm-gpu/vllm.py
+++ b/llama_stack/templates/vllm-gpu/vllm.py
@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
        container_image=None,
        template_path=None,
        providers=providers,
        default_models=[inference_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={