From 07ccf908f728043530fa4aa2d3d1fd7e17c3c6b1 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 20 Feb 2025 14:02:36 -0800 Subject: [PATCH] ModelAlias -> ProviderModelEntry --- .../self_hosted_distro/ollama.md | 2 +- .../inference/meta_reference/inference.py | 4 +- .../remote/inference/bedrock/bedrock.py | 4 +- .../remote/inference/bedrock/models.py | 10 ++--- .../remote/inference/cerebras/cerebras.py | 4 +- .../remote/inference/cerebras/models.py | 8 ++-- .../remote/inference/databricks/databricks.py | 10 ++--- .../remote/inference/fireworks/fireworks.py | 4 +- .../remote/inference/fireworks/models.py | 24 +++++------ .../providers/remote/inference/groq/groq.py | 18 ++++---- .../remote/inference/nvidia/models.py | 22 +++++----- .../remote/inference/nvidia/nvidia.py | 4 +- .../remote/inference/ollama/ollama.py | 42 +++++++++---------- .../remote/inference/sambanova/models.py | 22 +++++----- .../remote/inference/sambanova/sambanova.py | 4 +- .../providers/remote/inference/tgi/tgi.py | 8 ++-- .../remote/inference/together/models.py | 22 +++++----- .../remote/inference/together/together.py | 4 +- .../providers/remote/inference/vllm/vllm.py | 8 ++-- .../utils/inference/model_registry.py | 14 +++---- llama_stack/templates/bedrock/bedrock.py | 4 +- llama_stack/templates/cerebras/cerebras.py | 4 +- llama_stack/templates/fireworks/fireworks.py | 4 +- llama_stack/templates/nvidia/nvidia.py | 4 +- llama_stack/templates/ollama/doc_template.md | 2 +- llama_stack/templates/sambanova/sambanova.py | 4 +- llama_stack/templates/together/together.py | 4 +- 27 files changed, 132 insertions(+), 132 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index a3a45f9a8..2fa796e81 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -130,7 +130,7 @@ llama stack run ./run-with-safety.yaml \ ### (Optional) Update Model Serving Configuration ```{note} -Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models. +Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models. ``` To serve a new model with `ollama` diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index dfd27d408..763d9664d 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import ( ) from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) from llama_stack.providers.utils.inference.prompt_adapter import ( augment_content_with_response_format_prompt, @@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl( self.model_registry_helper = ModelRegistryHelper( [ - build_hf_repo_model_alias( + build_hf_repo_model_entry( llama_model.descriptor(), llama_model.core_model_id.value, ) diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index 610707f3f..9c5a291db 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -43,12 +43,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) -from .models import MODEL_ALIASES +from .models import MODEL_ENTRIES class BedrockInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: BedrockConfig) -> None: - ModelRegistryHelper.__init__(self, MODEL_ALIASES) + ModelRegistryHelper.__init__(self, MODEL_ENTRIES) self._config = config self._client = create_bedrock_client(config) diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py index 4c5248619..c5079799f 100644 --- a/llama_stack/providers/remote/inference/bedrock/models.py +++ b/llama_stack/providers/remote/inference/bedrock/models.py @@ -6,19 +6,19 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) -MODEL_ALIASES = [ - build_hf_repo_model_alias( +MODEL_ENTRIES = [ + build_hf_repo_model_entry( "meta.llama3-1-8b-instruct-v1:0", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta.llama3-1-70b-instruct-v1:0", CoreModelId.llama3_1_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta.llama3-1-405b-instruct-v1:0", CoreModelId.llama3_1_405b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index e7b77a6e9..0a27d81d7 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -41,14 +41,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import CerebrasImplConfig -from .models import model_aliases +from .models import model_entries class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: CerebrasImplConfig) -> None: ModelRegistryHelper.__init__( self, - model_aliases=model_aliases, + model_entries=model_entries, ) self.config = config diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index 53b0d5b55..a48864d49 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -6,15 +6,15 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) -model_aliases = [ - build_hf_repo_model_alias( +model_entries = [ + build_hf_repo_model_entry( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama-3.3-70b", CoreModelId.llama3_3_70b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py index 03da4d129..de13638f5 100644 --- a/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/llama_stack/providers/remote/inference/databricks/databricks.py @@ -25,7 +25,7 @@ from llama_stack.apis.inference import ( from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, @@ -38,12 +38,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import DatabricksImplConfig -model_aliases = [ - build_hf_repo_model_alias( +model_entries = [ + build_hf_repo_model_entry( "databricks-meta-llama-3-1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "databricks-meta-llama-3-1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), @@ -52,7 +52,7 @@ model_aliases = [ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: DatabricksImplConfig) -> None: - ModelRegistryHelper.__init__(self, model_aliases=model_aliases) + ModelRegistryHelper.__init__(self, model_entries=model_entries) self.config = config async def initialize(self) -> None: diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 4f8d167f1..3f2ee91e0 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -47,12 +47,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import FireworksImplConfig -from .models import MODEL_ALIASES +from .models import MODEL_ENTRIES class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData): def __init__(self, config: FireworksImplConfig) -> None: - ModelRegistryHelper.__init__(self, MODEL_ALIASES) + ModelRegistryHelper.__init__(self, MODEL_ENTRIES) self.config = config async def initialize(self) -> None: diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 8ba67c9ff..b44f89853 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -6,47 +6,47 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) -MODEL_ALIASES = [ - build_hf_repo_model_alias( +MODEL_ENTRIES = [ + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p3-70b-instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-guard-3-8b", CoreModelId.llama_guard_3_8b.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "accounts/fireworks/models/llama-guard-3-11b-vision", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 12ee613fe..c75e92dfe 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_hf_repo_model_alias, - build_model_alias, + build_hf_repo_model_entry, + build_model_entry, ) from .groq_utils import ( @@ -41,20 +41,20 @@ from .groq_utils import ( convert_chat_completion_response_stream, ) -_MODEL_ALIASES = [ - build_hf_repo_model_alias( +_MODEL_ENTRIES = [ + build_hf_repo_model_entry( "llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_model_entry( "llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -62,7 +62,7 @@ _MODEL_ALIASES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), @@ -73,7 +73,7 @@ class GroqInferenceAdapter(Inference, ModelRegistryHelper, NeedsRequestProviderD _config: GroqConfig def __init__(self, config: GroqConfig): - ModelRegistryHelper.__init__(self, model_aliases=_MODEL_ALIASES) + ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) self._config = config def completion( diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index 6a359e009..c432861ee 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) -_MODEL_ALIASES = [ - build_hf_repo_model_alias( +_MODEL_ENTRIES = [ + build_hf_repo_model_entry( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama3-70b-instruct", CoreModelId.llama3_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama-3.1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama-3.1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama-3.1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama-3.2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama-3.2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama-3.2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta/llama-3.2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index bcd29a0df..824389577 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -33,7 +33,7 @@ from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from . import NVIDIAConfig -from .models import _MODEL_ALIASES +from .models import _MODEL_ENTRIES from .openai_utils import ( convert_chat_completion_request, convert_completion_request, @@ -50,7 +50,7 @@ logger = logging.getLogger(__name__) class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): def __init__(self, config: NVIDIAConfig) -> None: # TODO(mf): filter by available models - ModelRegistryHelper.__init__(self, model_aliases=_MODEL_ALIASES) + ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 287f025e0..e16c02003 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_hf_repo_model_alias, - build_model_alias, + build_hf_repo_model_entry, + build_model_entry, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -58,74 +58,74 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( log = logging.getLogger(__name__) -model_aliases = [ - build_hf_repo_model_alias( +model_entries = [ + build_hf_repo_model_entry( "llama3.1:8b-instruct-fp16", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_model_entry( "llama3.1:8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3.1:70b-instruct-fp16", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_model_entry( "llama3.1:70b", CoreModelId.llama3_1_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3.1:405b-instruct-fp16", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_model_entry( "llama3.1:405b", CoreModelId.llama3_1_405b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3.2:1b-instruct-fp16", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_model_entry( "llama3.2:1b", CoreModelId.llama3_2_1b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3.2:3b-instruct-fp16", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_model_entry( "llama3.2:3b", CoreModelId.llama3_2_3b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3.2-vision:11b-instruct-fp16", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_model_entry( "llama3.2-vision:latest", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3.2-vision:90b-instruct-fp16", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_model_entry( "llama3.2-vision:90b", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama3.3:70b", CoreModelId.llama3_3_70b_instruct.value, ), # The Llama Guard models don't have their full fp16 versions # so we are going to alias their default version to the canonical SKU - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama-guard3:8b", CoreModelId.llama_guard_3_8b.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "llama-guard3:1b", CoreModelId.llama_guard_3_1b.value, ), @@ -134,7 +134,7 @@ model_aliases = [ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): def __init__(self, url: str) -> None: - self.register_helper = ModelRegistryHelper(model_aliases) + self.register_helper = ModelRegistryHelper(model_entries) self.url = url @property diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py index 1e002c81d..2231be22d 100644 --- a/llama_stack/providers/remote/inference/sambanova/models.py +++ b/llama_stack/providers/remote/inference/sambanova/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) -MODEL_ALIASES = [ - build_hf_repo_model_alias( +MODEL_ENTRIES = [ + build_hf_repo_model_entry( "Meta-Llama-3.1-8B-Instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Meta-Llama-3.1-70B-Instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Meta-Llama-3.1-405B-Instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Meta-Llama-3.2-1B-Instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Meta-Llama-3.2-3B-Instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Meta-Llama-3.3-70B-Instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Llama-3.2-11B-Vision-Instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Llama-3.2-90B-Vision-Instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py index 30a0934a3..c05284d7d 100644 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -31,12 +31,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import SambaNovaImplConfig -from .models import MODEL_ALIASES +from .models import MODEL_ENTRIES class SambaNovaInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: SambaNovaImplConfig) -> None: - ModelRegistryHelper.__init__(self, model_aliases=MODEL_ALIASES) + ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) self.config = config async def initialize(self) -> None: diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index cd2311a48..1a50e3b61 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl log = logging.getLogger(__name__) -def build_hf_repo_model_aliases(): +def build_hf_repo_model_entries(): return [ - build_hf_repo_model_alias( + build_hf_repo_model_entry( model.huggingface_repo, model.descriptor(), ) @@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): model_id: str def __init__(self) -> None: - self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries()) self.huggingface_repo_to_llama_model_id = { model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo } diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index 87904c47b..90fb60508 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) -MODEL_ALIASES = [ - build_hf_repo_model_alias( +MODEL_ENTRIES = [ + build_hf_repo_model_entry( "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", CoreModelId.llama3_1_8b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", CoreModelId.llama3_1_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", CoreModelId.llama3_1_405b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Llama-3.2-3B-Instruct-Turbo", CoreModelId.llama3_2_3b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Llama-3.3-70B-Instruct-Turbo", CoreModelId.llama3_3_70b_instruct.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), - build_hf_repo_model_alias( + build_hf_repo_model_entry( "meta-llama/Llama-Guard-3-11B-Vision-Turbo", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 75428e70a..8afd3e85b 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -46,12 +46,12 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import TogetherImplConfig -from .models import MODEL_ALIASES +from .models import MODEL_ENTRIES class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProviderData): def __init__(self, config: TogetherImplConfig) -> None: - ModelRegistryHelper.__init__(self, MODEL_ALIASES) + ModelRegistryHelper.__init__(self, MODEL_ENTRIES) self.config = config async def initialize(self) -> None: diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 75dc432e4..4e5de3933 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_hf_repo_model_alias, + build_hf_repo_model_entry, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionResponse, @@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig log = logging.getLogger(__name__) -def build_hf_repo_model_aliases(): +def build_hf_repo_model_entries(): return [ - build_hf_repo_model_alias( + build_hf_repo_model_entry( model.huggingface_repo, model.descriptor(), ) @@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response( class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): def __init__(self, config: VLLMInferenceAdapterConfig) -> None: - self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries()) self.config = config self.client = None diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index e14a733d1..288f27449 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -18,7 +18,7 @@ from llama_stack.providers.utils.inference import ( # TODO: this class is more confusing than useful right now. We need to make it # more closer to the Model class. -class ModelAlias(BaseModel): +class ProviderModelEntry(BaseModel): provider_model_id: str aliases: List[str] = Field(default_factory=list) llama_model: Optional[str] = None @@ -32,8 +32,8 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]: return None -def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: - return ModelAlias( +def build_hf_repo_model_entry(provider_model_id: str, model_descriptor: str) -> ProviderModelEntry: + return ProviderModelEntry( provider_model_id=provider_model_id, aliases=[ get_huggingface_repo(model_descriptor), @@ -42,8 +42,8 @@ def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ) -def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: - return ModelAlias( +def build_model_entry(provider_model_id: str, model_descriptor: str) -> ProviderModelEntry: + return ProviderModelEntry( provider_model_id=provider_model_id, aliases=[], llama_model=model_descriptor, @@ -51,10 +51,10 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli class ModelRegistryHelper(ModelsProtocolPrivate): - def __init__(self, model_aliases: List[ModelAlias]): + def __init__(self, model_entries: List[ProviderModelEntry]): self.alias_to_provider_id_map = {} self.provider_id_to_llama_model_map = {} - for alias_obj in model_aliases: + for alias_obj in model_entries: for alias in alias_obj.aliases: self.alias_to_provider_id_map[alias] = alias_obj.provider_model_id # also add a mapping from provider model id to itself for easy lookup diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 550269f61..628e78612 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -10,7 +10,7 @@ from llama_stack.apis.models import ModelInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig -from llama_stack.providers.remote.inference.bedrock.models import MODEL_ALIASES +from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -47,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate: provider_model_id=m.provider_model_id, provider_id="bedrock", ) - for m in MODEL_ALIASES + for m in MODEL_ENTRIES ] default_tool_groups = [ ToolGroupInput( diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 5f3921102..c467579ac 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -14,7 +14,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig -from llama_stack.providers.remote.inference.cerebras.models import model_aliases +from llama_stack.providers.remote.inference.cerebras.models import model_entries from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate: provider_model_id=m.provider_model_id, provider_id="cerebras", ) - for m in model_aliases + for m in model_entries ] embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 8d91c223d..5cde01e81 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig -from llama_stack.providers.remote.inference.fireworks.models import MODEL_ALIASES +from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -67,7 +67,7 @@ def get_distribution_template() -> DistributionTemplate: provider_model_id=m.provider_model_id, provider_id="fireworks", ) - for m in MODEL_ALIASES + for m in MODEL_ENTRIES ] embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 6bca48e99..a505a1b93 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -9,7 +9,7 @@ from pathlib import Path from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig -from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ALIASES +from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -45,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate: provider_model_id=m.provider_model_id, provider_id="nvidia", ) - for m in _MODEL_ALIASES + for m in _MODEL_ENTRIES ] default_tool_groups = [ ToolGroupInput( diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md index 29efe39c3..1d95e4b65 100644 --- a/llama_stack/templates/ollama/doc_template.md +++ b/llama_stack/templates/ollama/doc_template.md @@ -119,7 +119,7 @@ llama stack run ./run-with-safety.yaml \ ### (Optional) Update Model Serving Configuration ```{note} -Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models. +Please check the [model_entries](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models. ``` To serve a new model with `ollama` diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index dac7346a7..725c6abc4 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import ( ) from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig -from llama_stack.providers.remote.inference.sambanova.models import MODEL_ALIASES +from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -47,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate: provider_model_id=m.provider_model_id, provider_id=name, ) - for m in MODEL_ALIASES + for m in MODEL_ENTRIES ] default_tool_groups = [ diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index ef6847fb2..d46dd9d27 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -19,7 +19,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig -from llama_stack.providers.remote.inference.together.models import MODEL_ALIASES +from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -65,7 +65,7 @@ def get_distribution_template() -> DistributionTemplate: provider_model_id=m.provider_model_id, provider_id="together", ) - for m in MODEL_ALIASES + for m in MODEL_ENTRIES ] default_tool_groups = [ ToolGroupInput(