From 04de2f84e96e6b448b6d2d1a826ebcb5e223d7ad Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 27 Feb 2025 16:39:23 -0800 Subject: [PATCH] fix: register provider model name and HF alias in run.yaml (#1304) Each model known to the system has two identifiers: - the `provider_resource_id` (what the provider calls it) -- e.g., `accounts/fireworks/models/llama-v3p1-8b-instruct` - the `identifier` (`model_id`) under which it is registered and gets routed to the appropriate provider. We have so far used the HuggingFace repo alias as the standardized identifier you can use to refer to the model. So in the above example, we'd use `meta-llama/Llama-3.1-8B-Instruct` as the name under which it gets registered. This makes it convenient for users to refer to these models across providers. However, we forgot to register the _actual_ provider model ID also. You should be able to route via `provider_resource_id` also, of course. This change fixes this (somewhat grave) omission. *Note*: this change is additive -- more aliases work now compared to before. ## Test Plan Run the following for distro=(ollama fireworks together) ``` LLAMA_STACK_CONFIG=$distro \ pytest -s -v tests/client-sdk/inference/test_text_inference.py \ --inference-model=meta-llama/Llama-3.1-8B-Instruct --vision-inference-model="" ``` --- .../remote_hosted_distro/nvidia.md | 26 +++---- .../self_hosted_distro/bedrock.md | 6 +- .../self_hosted_distro/cerebras.md | 4 +- .../self_hosted_distro/fireworks.md | 22 +++--- .../distributions/self_hosted_distro/groq.md | 10 +-- .../self_hosted_distro/sambanova.md | 18 ++--- .../self_hosted_distro/together.md | 22 +++--- .../remote/inference/cerebras/cerebras.py | 4 +- .../remote/inference/cerebras/models.py | 2 +- .../providers/remote/inference/groq/models.py | 13 ++-- .../remote/inference/nvidia/models.py | 2 +- .../remote/inference/nvidia/nvidia.py | 4 +- llama_stack/templates/bedrock/bedrock.py | 19 ++--- llama_stack/templates/bedrock/doc_template.md | 2 +- llama_stack/templates/bedrock/run.yaml | 15 ++++ llama_stack/templates/cerebras/cerebras.py | 20 ++---- .../templates/cerebras/doc_template.md | 2 +- llama_stack/templates/cerebras/run.yaml | 10 +++ llama_stack/templates/ci-tests/ci_tests.py | 19 ++--- llama_stack/templates/ci-tests/run.yaml | 61 ++++++++++++++++ llama_stack/templates/dell/dell.py | 3 - llama_stack/templates/dev/dev.py | 24 ++----- llama_stack/templates/dev/run.yaml | 72 ++++++++++++++++++- .../templates/fireworks/doc_template.md | 2 +- llama_stack/templates/fireworks/fireworks.py | 21 ++---- .../templates/fireworks/run-with-safety.yaml | 50 +++++++++++++ llama_stack/templates/fireworks/run.yaml | 50 +++++++++++++ llama_stack/templates/groq/doc_template.md | 2 +- llama_stack/templates/groq/groq.py | 21 ++---- llama_stack/templates/groq/run.yaml | 22 +++++- .../templates/hf-endpoint/hf_endpoint.py | 1 - .../templates/hf-serverless/hf_serverless.py | 1 - .../meta-reference-gpu/meta_reference.py | 1 - .../meta_reference.py | 1 - llama_stack/templates/nvidia/doc_template.md | 2 +- llama_stack/templates/nvidia/nvidia.py | 24 +++---- llama_stack/templates/nvidia/run.yaml | 45 ++++++++++++ llama_stack/templates/ollama/ollama.py | 1 - llama_stack/templates/remote-vllm/vllm.py | 1 - .../templates/sambanova/doc_template.md | 2 +- llama_stack/templates/sambanova/run.yaml | 45 ++++++++++++ llama_stack/templates/sambanova/sambanova.py | 20 ++---- llama_stack/templates/template.py | 47 +++++++++++- llama_stack/templates/tgi/tgi.py | 1 - .../templates/together/doc_template.md | 2 +- .../templates/together/run-with-safety.yaml | 45 ++++++++++++ llama_stack/templates/together/run.yaml | 45 ++++++++++++ llama_stack/templates/together/together.py | 21 ++---- llama_stack/templates/vllm-gpu/vllm.py | 1 - 49 files changed, 637 insertions(+), 217 deletions(-) diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 20a10ba4d..efa0a2d74 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -27,19 +27,19 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)` -- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)` -- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` -- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)` -- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)` -- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)` -- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)` +- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)` +- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)` +- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `nvidia/llama-3.2-nv-embedqa-1b-v2 ` +- `nvidia/nv-embedqa-e5-v5 ` +- `nvidia/nv-embedqa-mistral-7b-v2 ` +- `snowflake/arctic-embed-l ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 14f004926..623ab6848 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -34,9 +34,9 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)` -- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)` +- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index 6e2af14fd..8f14ae7cc 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -27,8 +27,8 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)` -- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)` +- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index f69e6d963..1fcd6f7af 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)` -- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)` -- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)` -- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)` +- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)` +- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `nomic-ai/nomic-embed-text-v1.5 ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index 9fb7b2619..ce3f8aecc 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -37,11 +37,11 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)` -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)` -- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)` -- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)` -- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)` +- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `groq/llama-3.1-8b-instant ` +- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)` +- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index e6ac616be..a7f738261 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -34,15 +34,15 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` -- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)` +- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 7af0dcf4d..f361e93c7 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct` -- `meta-llama/Llama-3.1-70B-Instruct` -- `meta-llama/Llama-3.1-405B-Instruct-FP8` -- `meta-llama/Llama-3.2-3B-Instruct` -- `meta-llama/Llama-3.2-11B-Vision-Instruct` -- `meta-llama/Llama-3.2-90B-Vision-Instruct` -- `meta-llama/Llama-3.3-70B-Instruct` -- `meta-llama/Llama-Guard-3-8B` -- `meta-llama/Llama-Guard-3-11B-Vision` -- `togethercomputer/m2-bert-80M-8k-retrieval` -- `togethercomputer/m2-bert-80M-32k-retrieval` +- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` +- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `togethercomputer/m2-bert-80M-8k-retrieval ` +- `togethercomputer/m2-bert-80M-32k-retrieval ` ### Prerequisite: API Keys diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 4deeea630..748c5237a 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import CerebrasImplConfig -from .models import model_entries +from .models import MODEL_ENTRIES class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: CerebrasImplConfig) -> None: ModelRegistryHelper.__init__( self, - model_entries=model_entries, + model_entries=MODEL_ENTRIES, ) self.config = config diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index a48864d49..37419bf4c 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -model_entries = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py index 4364edffa..08b9b4dc4 100644 --- a/llama_stack/providers/remote/inference/groq/models.py +++ b/llama_stack/providers/remote/inference/groq/models.py @@ -5,10 +5,13 @@ # the root directory of this source tree. from llama_stack.models.llama.sku_list import CoreModelId -from llama_stack.providers.utils.inference.model_registry import build_model_entry +from llama_stack.providers.utils.inference.model_registry import ( + build_hf_repo_model_entry, + build_model_entry, +) MODEL_ENTRIES = [ - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), @@ -16,11 +19,11 @@ MODEL_ENTRIES = [ "groq/llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -28,7 +31,7 @@ MODEL_ENTRIES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index a855566bc..879855003 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -_MODEL_ENTRIES = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index cc3bd85bb..2d93bb445 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from . import NVIDIAConfig -from .models import _MODEL_ENTRIES +from .models import MODEL_ENTRIES from .openai_utils import ( convert_chat_completion_request, convert_completion_request, @@ -62,7 +62,7 @@ logger = logging.getLogger(__name__) class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): def __init__(self, config: NVIDIAConfig) -> None: # TODO(mf): filter by available models - ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) + ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 628e78612..18e287390 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -6,12 +6,10 @@ from pathlib import Path -from llama_stack.apis.models import ModelInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = { + "bedrock": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="bedrock", - ) - for m in MODEL_ENTRIES - ] default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md index 357638ea5..24106525a 100644 --- a/llama_stack/templates/bedrock/doc_template.md +++ b/llama_stack/templates/bedrock/doc_template.md @@ -28,7 +28,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 7d03b7c29..00a02e0d5 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -88,16 +88,31 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db models: +- metadata: {} + model_id: meta.llama3-1-8b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-8b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-8b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-70b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-70b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-70b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-405b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-405b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: bedrock diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 544a50c03..bda22a498 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -8,14 +8,13 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig -from llama_stack.providers.remote.inference.cerebras.models import model_entries -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate: config=SentenceTransformersInferenceConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="cerebras", - ) - for m in model_entries - ] + available_models = { + "cerebras": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md index 77fc6f478..3f5645958 100644 --- a/llama_stack/templates/cerebras/doc_template.md +++ b/llama_stack/templates/cerebras/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 6afff2be2..43d3158ba 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -90,11 +90,21 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db models: +- metadata: {} + model_id: llama3.1-8b + provider_id: cerebras + provider_model_id: llama3.1-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: cerebras provider_model_id: llama3.1-8b model_type: llm +- metadata: {} + model_id: llama-3.3-70b + provider_id: cerebras + provider_model_id: llama-3.3-70b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: cerebras diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index a93cfff9c..979256fa1 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_id="fireworks", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=default_models + [embedding_model], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 295d72e71..3a973cabf 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -90,51 +90,112 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 model_id: nomic-ai/nomic-embed-text-v1.5 provider_id: fireworks + provider_model_id: nomic-ai/nomic-embed-text-v1.5 model_type: embedding - metadata: embedding_dimension: 384 diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 8348beafd..52c5a5476 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -3,7 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ( @@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate: distro_type="self_hosted", description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container", container_image=None, - template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, embedding_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py index fe80c3842..694913119 100644 --- a/llama_stack/templates/dev/dev.py +++ b/llama_stack/templates/dev/dev.py @@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: @@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: ), ] inference_providers = [] - default_models = [] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = {} for provider_id, model_entries, config in providers: inference_providers.append( Provider( @@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: config=config, ) ) - default_models.extend( - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=provider_id, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in model_entries - ) - return inference_providers, default_models + available_models[provider_id] = model_entries + return inference_providers, available_models def get_distribution_template() -> DistributionTemplate: - inference_providers, default_models = get_inference_providers() + inference_providers, available_models = get_inference_providers() providers = { "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], @@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate: }, ) + default_models = get_model_registry(available_models) return DistributionTemplate( name=name, distro_type="self_hosted", @@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 0ada465e4..f1d72d572 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -136,51 +136,101 @@ models: provider_id: openai provider_model_id: openai/text-embedding-3-large model_type: embedding +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks @@ -247,25 +297,45 @@ models: provider_model_id: gemini/text-embedding-004 model_type: embedding - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md index 48677d571..6c7743cb8 100644 --- a/llama_stack/templates/fireworks/doc_template.md +++ b/llama_stack/templates/fireworks/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index c78664dde..0111bc118 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="fireworks", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) + embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 6f622c7d9..0fe5f3026 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -99,51 +99,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index e6d21d10d..cbe85c4f7 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -93,51 +93,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/groq/doc_template.md b/llama_stack/templates/groq/doc_template.md index 3f9ccbd16..85b916ccd 100644 --- a/llama_stack/templates/groq/doc_template.md +++ b/llama_stack/templates/groq/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index b0c7a3804..71c504cde 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import ( Provider, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.remote.inference.groq import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate: }, ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=name, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + "groq": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 220aa847b..78212c8d9 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -91,25 +91,45 @@ metadata_store: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db models: - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 62584929c..f2849f0bc 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index af04e39d4..cea1075e2 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 9bff981d1..3c38e0edd 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index fca15fcc5..32476f37f 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference with fp8, int4 quantization for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md index 9d9006a27..71b8ac32f 100644 --- a/llama_stack/templates/nvidia/doc_template.md +++ b/llama_stack/templates/nvidia/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 56d13a09a..cc5e96333 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,11 +6,10 @@ from pathlib import Path -from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models +from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig -from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate: config=NVIDIAConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="nvidia", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in _MODEL_ENTRIES - ] + available_models = { + "nvidia": MODEL_ENTRIES, + } default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate: ), ] + default_models = get_model_registry(available_models) return DistributionTemplate( name="nvidia", distro_type="remote_hosted", @@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index bfbad749a..52e78df7b 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -90,46 +90,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db models: +- metadata: {} + model_id: meta/llama3-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-8B-Instruct provider_id: nvidia provider_model_id: meta/llama3-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama3-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: nvidia provider_model_id: meta/llama3-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-405b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: nvidia provider_model_id: meta/llama-3.1-405b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-1b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-1b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-3b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-3b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-11b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-90b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: nvidia diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index ba3cfe684..83c7b1a63 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 10d291456..73ee36c3f 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use (an external) vLLM server for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md index 4b18aa756..b2a295716 100644 --- a/llama_stack/templates/sambanova/doc_template.md +++ b/llama_stack/templates/sambanova/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 26815dcd0..124d11baf 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -68,46 +68,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db models: +- metadata: {} + model_id: Meta-Llama-3.1-8B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-8B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-70B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-405B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: sambanova provider_model_id: Meta-Llama-3.1-405B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-1B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-1B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-3B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-3B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.3-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.3-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.3-70B-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-11B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-11B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-90B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-90B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-Guard-3-8B + provider_id: sambanova + provider_model_id: Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: sambanova diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 725c6abc4..0a0b6bd7e 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -7,15 +7,13 @@ from pathlib import Path from llama_stack.distribution.datatypes import ( - ModelInput, Provider, ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate: config=SambaNovaImplConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id=name, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + name: MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index cb5b07be3..2afb84a63 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import ( ) from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]: + models = [] + for provider_id, entries in available_models.items(): + for entry in entries: + ids = [entry.provider_model_id] + entry.aliases + for model_id in ids: + models.append( + ModelInput( + model_id=model_id, + provider_model_id=entry.provider_model_id, + provider_id=provider_id, + model_type=entry.model_type, + metadata=entry.metadata, + ) + ) + return models + + +class DefaultModel(BaseModel): + model_id: str + doc_string: str + + class RunConfigSettings(BaseModel): provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) default_models: Optional[List[ModelInput]] = None @@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel): run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None container_image: Optional[str] = None - default_models: Optional[List[ModelInput]] = None + available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None def build_config(self) -> BuildConfig: return BuildConfig( @@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel): autoescape=True, ) template = env.from_string(template) + + default_models = [] + if self.available_models_by_provider: + has_multiple_providers = len(self.available_models_by_provider.keys()) > 1 + for provider_id, model_entries in self.available_models_by_provider.items(): + for model_entry in model_entries: + doc_parts = [] + if model_entry.aliases: + doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}") + if has_multiple_providers: + doc_parts.append(f"provider: {provider_id}") + + default_models.append( + DefaultModel( + model_id=model_entry.provider_model_id, + doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "", + ) + ) + return template.render( name=self.name, description=self.description, providers=self.providers, providers_table=providers_table, run_config_env_vars=self.run_config_env_vars, - default_models=self.default_models, + default_models=default_models, ) def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None: diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 9b80414f9..eb49871a0 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md index 405d68f91..be055a43e 100644 --- a/llama_stack/templates/together/doc_template.md +++ b/llama_stack/templates/together/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }}` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 9193a3ef6..26d879802 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -99,46 +99,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 32ddf7b16..0969cfe56 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -93,46 +93,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 8d0e2353c..24c395e1e 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate: provider_type="inline::sentence-transformers", config=SentenceTransformersInferenceConfig.sample_run_config(), ) - - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="together", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "together": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 8cdec589e..27a16b93d 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={