diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 20a10ba4d..efa0a2d74 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -27,19 +27,19 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)` -- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)` -- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` -- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)` -- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)` -- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)` -- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)` +- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)` +- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)` +- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `nvidia/llama-3.2-nv-embedqa-1b-v2 ` +- `nvidia/nv-embedqa-e5-v5 ` +- `nvidia/nv-embedqa-mistral-7b-v2 ` +- `snowflake/arctic-embed-l ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 14f004926..623ab6848 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -34,9 +34,9 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)` -- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)` +- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index 6e2af14fd..8f14ae7cc 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -27,8 +27,8 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)` -- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)` +- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index f69e6d963..1fcd6f7af 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)` -- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)` -- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)` -- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)` +- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)` +- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `nomic-ai/nomic-embed-text-v1.5 ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index 9fb7b2619..ce3f8aecc 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -37,11 +37,11 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)` -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)` -- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)` -- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)` -- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)` +- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `groq/llama-3.1-8b-instant ` +- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)` +- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index e6ac616be..a7f738261 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -34,15 +34,15 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` -- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)` +- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 7af0dcf4d..f361e93c7 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct` -- `meta-llama/Llama-3.1-70B-Instruct` -- `meta-llama/Llama-3.1-405B-Instruct-FP8` -- `meta-llama/Llama-3.2-3B-Instruct` -- `meta-llama/Llama-3.2-11B-Vision-Instruct` -- `meta-llama/Llama-3.2-90B-Vision-Instruct` -- `meta-llama/Llama-3.3-70B-Instruct` -- `meta-llama/Llama-Guard-3-8B` -- `meta-llama/Llama-Guard-3-11B-Vision` -- `togethercomputer/m2-bert-80M-8k-retrieval` -- `togethercomputer/m2-bert-80M-32k-retrieval` +- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` +- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `togethercomputer/m2-bert-80M-8k-retrieval ` +- `togethercomputer/m2-bert-80M-32k-retrieval ` ### Prerequisite: API Keys diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 4deeea630..748c5237a 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import CerebrasImplConfig -from .models import model_entries +from .models import MODEL_ENTRIES class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: CerebrasImplConfig) -> None: ModelRegistryHelper.__init__( self, - model_entries=model_entries, + model_entries=MODEL_ENTRIES, ) self.config = config diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index a48864d49..37419bf4c 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -model_entries = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py index 4364edffa..08b9b4dc4 100644 --- a/llama_stack/providers/remote/inference/groq/models.py +++ b/llama_stack/providers/remote/inference/groq/models.py @@ -5,10 +5,13 @@ # the root directory of this source tree. from llama_stack.models.llama.sku_list import CoreModelId -from llama_stack.providers.utils.inference.model_registry import build_model_entry +from llama_stack.providers.utils.inference.model_registry import ( + build_hf_repo_model_entry, + build_model_entry, +) MODEL_ENTRIES = [ - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), @@ -16,11 +19,11 @@ MODEL_ENTRIES = [ "groq/llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -28,7 +31,7 @@ MODEL_ENTRIES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index a855566bc..879855003 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -_MODEL_ENTRIES = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index cc3bd85bb..2d93bb445 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from . import NVIDIAConfig -from .models import _MODEL_ENTRIES +from .models import MODEL_ENTRIES from .openai_utils import ( convert_chat_completion_request, convert_completion_request, @@ -62,7 +62,7 @@ logger = logging.getLogger(__name__) class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): def __init__(self, config: NVIDIAConfig) -> None: # TODO(mf): filter by available models - ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) + ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 628e78612..18e287390 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -6,12 +6,10 @@ from pathlib import Path -from llama_stack.apis.models import ModelInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = { + "bedrock": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="bedrock", - ) - for m in MODEL_ENTRIES - ] default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md index 357638ea5..24106525a 100644 --- a/llama_stack/templates/bedrock/doc_template.md +++ b/llama_stack/templates/bedrock/doc_template.md @@ -28,7 +28,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 7d03b7c29..00a02e0d5 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -88,16 +88,31 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db models: +- metadata: {} + model_id: meta.llama3-1-8b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-8b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-8b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-70b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-70b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-70b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-405b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-405b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: bedrock diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 544a50c03..bda22a498 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -8,14 +8,13 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig -from llama_stack.providers.remote.inference.cerebras.models import model_entries -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate: config=SentenceTransformersInferenceConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="cerebras", - ) - for m in model_entries - ] + available_models = { + "cerebras": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md index 77fc6f478..3f5645958 100644 --- a/llama_stack/templates/cerebras/doc_template.md +++ b/llama_stack/templates/cerebras/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 6afff2be2..43d3158ba 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -90,11 +90,21 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db models: +- metadata: {} + model_id: llama3.1-8b + provider_id: cerebras + provider_model_id: llama3.1-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: cerebras provider_model_id: llama3.1-8b model_type: llm +- metadata: {} + model_id: llama-3.3-70b + provider_id: cerebras + provider_model_id: llama-3.3-70b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: cerebras diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index a93cfff9c..979256fa1 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_id="fireworks", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=default_models + [embedding_model], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 295d72e71..3a973cabf 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -90,51 +90,112 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 model_id: nomic-ai/nomic-embed-text-v1.5 provider_id: fireworks + provider_model_id: nomic-ai/nomic-embed-text-v1.5 model_type: embedding - metadata: embedding_dimension: 384 diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 8348beafd..52c5a5476 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -3,7 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ( @@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate: distro_type="self_hosted", description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container", container_image=None, - template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, embedding_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py index fe80c3842..694913119 100644 --- a/llama_stack/templates/dev/dev.py +++ b/llama_stack/templates/dev/dev.py @@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: @@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: ), ] inference_providers = [] - default_models = [] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = {} for provider_id, model_entries, config in providers: inference_providers.append( Provider( @@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: config=config, ) ) - default_models.extend( - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=provider_id, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in model_entries - ) - return inference_providers, default_models + available_models[provider_id] = model_entries + return inference_providers, available_models def get_distribution_template() -> DistributionTemplate: - inference_providers, default_models = get_inference_providers() + inference_providers, available_models = get_inference_providers() providers = { "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], @@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate: }, ) + default_models = get_model_registry(available_models) return DistributionTemplate( name=name, distro_type="self_hosted", @@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 0ada465e4..f1d72d572 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -136,51 +136,101 @@ models: provider_id: openai provider_model_id: openai/text-embedding-3-large model_type: embedding +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks @@ -247,25 +297,45 @@ models: provider_model_id: gemini/text-embedding-004 model_type: embedding - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md index 48677d571..6c7743cb8 100644 --- a/llama_stack/templates/fireworks/doc_template.md +++ b/llama_stack/templates/fireworks/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index c78664dde..0111bc118 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="fireworks", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) + embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 6f622c7d9..0fe5f3026 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -99,51 +99,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index e6d21d10d..cbe85c4f7 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -93,51 +93,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/groq/doc_template.md b/llama_stack/templates/groq/doc_template.md index 3f9ccbd16..85b916ccd 100644 --- a/llama_stack/templates/groq/doc_template.md +++ b/llama_stack/templates/groq/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index b0c7a3804..71c504cde 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import ( Provider, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.remote.inference.groq import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate: }, ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=name, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + "groq": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 220aa847b..78212c8d9 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -91,25 +91,45 @@ metadata_store: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db models: - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 62584929c..f2849f0bc 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index af04e39d4..cea1075e2 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 9bff981d1..3c38e0edd 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index fca15fcc5..32476f37f 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference with fp8, int4 quantization for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md index 9d9006a27..71b8ac32f 100644 --- a/llama_stack/templates/nvidia/doc_template.md +++ b/llama_stack/templates/nvidia/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 56d13a09a..cc5e96333 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,11 +6,10 @@ from pathlib import Path -from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models +from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig -from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate: config=NVIDIAConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="nvidia", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in _MODEL_ENTRIES - ] + available_models = { + "nvidia": MODEL_ENTRIES, + } default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate: ), ] + default_models = get_model_registry(available_models) return DistributionTemplate( name="nvidia", distro_type="remote_hosted", @@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index bfbad749a..52e78df7b 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -90,46 +90,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db models: +- metadata: {} + model_id: meta/llama3-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-8B-Instruct provider_id: nvidia provider_model_id: meta/llama3-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama3-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: nvidia provider_model_id: meta/llama3-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-405b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: nvidia provider_model_id: meta/llama-3.1-405b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-1b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-1b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-3b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-3b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-11b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-90b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: nvidia diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index ba3cfe684..83c7b1a63 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 10d291456..73ee36c3f 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use (an external) vLLM server for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md index 4b18aa756..b2a295716 100644 --- a/llama_stack/templates/sambanova/doc_template.md +++ b/llama_stack/templates/sambanova/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 26815dcd0..124d11baf 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -68,46 +68,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db models: +- metadata: {} + model_id: Meta-Llama-3.1-8B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-8B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-70B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-405B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: sambanova provider_model_id: Meta-Llama-3.1-405B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-1B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-1B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-3B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-3B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.3-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.3-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.3-70B-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-11B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-11B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-90B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-90B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-Guard-3-8B + provider_id: sambanova + provider_model_id: Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: sambanova diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 725c6abc4..0a0b6bd7e 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -7,15 +7,13 @@ from pathlib import Path from llama_stack.distribution.datatypes import ( - ModelInput, Provider, ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate: config=SambaNovaImplConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id=name, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + name: MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index cb5b07be3..2afb84a63 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import ( ) from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]: + models = [] + for provider_id, entries in available_models.items(): + for entry in entries: + ids = [entry.provider_model_id] + entry.aliases + for model_id in ids: + models.append( + ModelInput( + model_id=model_id, + provider_model_id=entry.provider_model_id, + provider_id=provider_id, + model_type=entry.model_type, + metadata=entry.metadata, + ) + ) + return models + + +class DefaultModel(BaseModel): + model_id: str + doc_string: str + + class RunConfigSettings(BaseModel): provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) default_models: Optional[List[ModelInput]] = None @@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel): run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None container_image: Optional[str] = None - default_models: Optional[List[ModelInput]] = None + available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None def build_config(self) -> BuildConfig: return BuildConfig( @@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel): autoescape=True, ) template = env.from_string(template) + + default_models = [] + if self.available_models_by_provider: + has_multiple_providers = len(self.available_models_by_provider.keys()) > 1 + for provider_id, model_entries in self.available_models_by_provider.items(): + for model_entry in model_entries: + doc_parts = [] + if model_entry.aliases: + doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}") + if has_multiple_providers: + doc_parts.append(f"provider: {provider_id}") + + default_models.append( + DefaultModel( + model_id=model_entry.provider_model_id, + doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "", + ) + ) + return template.render( name=self.name, description=self.description, providers=self.providers, providers_table=providers_table, run_config_env_vars=self.run_config_env_vars, - default_models=self.default_models, + default_models=default_models, ) def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None: diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 9b80414f9..eb49871a0 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md index 405d68f91..be055a43e 100644 --- a/llama_stack/templates/together/doc_template.md +++ b/llama_stack/templates/together/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }}` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 9193a3ef6..26d879802 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -99,46 +99,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 32ddf7b16..0969cfe56 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -93,46 +93,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 8d0e2353c..24c395e1e 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate: provider_type="inline::sentence-transformers", config=SentenceTransformersInferenceConfig.sample_run_config(), ) - - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="together", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "together": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 8cdec589e..27a16b93d 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={