diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index c79f97def..dfd27d408 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import ( ) from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.prompt_adapter import ( augment_content_with_response_format_prompt, @@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl( self.model_registry_helper = ModelRegistryHelper( [ - build_model_alias( + build_hf_repo_model_alias( llama_model.descriptor(), llama_model.core_model_id.value, ) diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py index b629e05d5..4c5248619 100644 --- a/llama_stack/providers/remote/inference/bedrock/models.py +++ b/llama_stack/providers/remote/inference/bedrock/models.py @@ -6,19 +6,19 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-8b-instruct-v1:0", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-70b-instruct-v1:0", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta.llama3-1-405b-instruct-v1:0", CoreModelId.llama3_1_405b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index 03ffeb492..53b0d5b55 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -6,15 +6,15 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-3.3-70b", CoreModelId.llama3_3_70b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py index 05e61361c..03da4d129 100644 --- a/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/llama_stack/providers/remote/inference/databricks/databricks.py @@ -25,7 +25,7 @@ from llama_stack.apis.inference import ( from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, @@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( from .config import DatabricksImplConfig model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "databricks-meta-llama-3-1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "databricks-meta-llama-3-1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index 14de585d4..8ba67c9ff 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -6,47 +6,47 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-v3p3-70b-instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-guard-3-8b", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "accounts/fireworks/models/llama-guard-3-11b-vision", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 441b6af5c..12ee613fe 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, + build_hf_repo_model_alias, build_model_alias, - build_model_alias_with_just_provider_model_id, ) from .groq_utils import ( @@ -42,19 +42,19 @@ from .groq_utils import ( ) _MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -62,7 +62,7 @@ _MODEL_ALIASES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_alias( + build_hf_repo_model_alias( "llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index 1d9b575d4..6a359e009 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) _MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama3-70b-instruct", CoreModelId.llama3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-8b-instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-70b-instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-1b-instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-11b-vision-instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta/llama-3.2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 2488d9071..287f025e0 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, + build_hf_repo_model_alias, build_model_alias, - build_model_alias_with_just_provider_model_id, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( log = logging.getLogger(__name__) model_aliases = [ - build_model_alias( + build_hf_repo_model_alias( "llama3.1:8b-instruct-fp16", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:8b", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.1:70b-instruct-fp16", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:70b", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.1:405b-instruct-fp16", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.1:405b", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2:1b-instruct-fp16", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2:1b", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2:3b-instruct-fp16", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2:3b", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2-vision:11b-instruct-fp16", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2-vision:latest", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.2-vision:90b-instruct-fp16", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias_with_just_provider_model_id( + build_model_alias( "llama3.2-vision:90b", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama3.3:70b", CoreModelId.llama3_3_70b_instruct.value, ), # The Llama Guard models don't have their full fp16 versions # so we are going to alias their default version to the canonical SKU - build_model_alias( + build_hf_repo_model_alias( "llama-guard3:8b", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "llama-guard3:1b", CoreModelId.llama_guard_3_1b.value, ), diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py index 27a4a149e..1e002c81d 100644 --- a/llama_stack/providers/remote/inference/sambanova/models.py +++ b/llama_stack/providers/remote/inference/sambanova/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-8B-Instruct", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-70B-Instruct", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.1-405B-Instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.2-1B-Instruct", CoreModelId.llama3_2_1b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.2-3B-Instruct", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-3.3-70B-Instruct", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Llama-3.2-11B-Vision-Instruct", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Llama-3.2-90B-Vision-Instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py index 7ffeced95..cd2311a48 100644 --- a/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/llama_stack/providers/remote/inference/tgi/tgi.py @@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl log = logging.getLogger(__name__) -def build_model_aliases(): +def build_hf_repo_model_aliases(): return [ - build_model_alias( + build_hf_repo_model_alias( model.huggingface_repo, model.descriptor(), ) @@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): model_id: str def __init__(self) -> None: - self.register_helper = ModelRegistryHelper(build_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) self.huggingface_repo_to_llama_model_id = { model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo } diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py index 87d282ea5..87904c47b 100644 --- a/llama_stack/providers/remote/inference/together/models.py +++ b/llama_stack/providers/remote/inference/together/models.py @@ -6,43 +6,43 @@ from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.providers.utils.inference.model_registry import ( - build_model_alias, + build_hf_repo_model_alias, ) MODEL_ALIASES = [ - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", CoreModelId.llama3_1_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", CoreModelId.llama3_1_405b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-3B-Instruct-Turbo", CoreModelId.llama3_2_3b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", CoreModelId.llama3_2_11b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", CoreModelId.llama3_2_90b_vision_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-3.3-70B-Instruct-Turbo", CoreModelId.llama3_3_70b_instruct.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Meta-Llama-Guard-3-8B", CoreModelId.llama_guard_3_8b.value, ), - build_model_alias( + build_hf_repo_model_alias( "meta-llama/Llama-Guard-3-11B-Vision-Turbo", CoreModelId.llama_guard_3_11b_vision.value, ), diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 220bf4bde..75dc432e4 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, - build_model_alias, + build_hf_repo_model_alias, ) from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionResponse, @@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig log = logging.getLogger(__name__) -def build_model_aliases(): +def build_hf_repo_model_aliases(): return [ - build_model_alias( + build_hf_repo_model_alias( model.huggingface_repo, model.descriptor(), ) @@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response( class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): def __init__(self, config: VLLMInferenceAdapterConfig) -> None: - self.register_helper = ModelRegistryHelper(build_model_aliases()) + self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases()) self.config = config self.client = None diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index 5cb785843..e14a733d1 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -32,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]: return None -def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: +def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: return ModelAlias( provider_model_id=provider_model_id, aliases=[ @@ -42,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli ) -def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias: +def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: return ModelAlias( provider_model_id=provider_model_id, aliases=[],