diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index 1e668b183..844cf6939 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -11,9 +11,11 @@ from typing import AsyncGenerator, List from llama_models.sku_list import resolve_model from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate +from llama_stack.providers.utils.inference.model_registry import build_model_alias +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.datatypes import ModelsProtocolPrivate +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.prompt_adapter import ( convert_image_media_to_url, request_has_media, @@ -28,10 +30,19 @@ from .model_parallel import LlamaModelParallelGenerator SEMAPHORE = asyncio.Semaphore(1) -class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): +class MetaReferenceInferenceImpl(Inference, ModelRegistryHelper, ModelsProtocolPrivate): def __init__(self, config: MetaReferenceInferenceConfig) -> None: self.config = config model = resolve_model(config.model) + ModelRegistryHelper.__init__( + self, + [ + build_model_alias( + model.descriptor(), + model.core_model_id, + ) + ], + ) if model is None: raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") self.model = model diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index 47abff689..8762a6c95 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -13,7 +13,7 @@ from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.tokenizer import Tokenizer from llama_stack.providers.utils.inference.model_registry import ( - ModelAlias, + build_model_alias, ModelRegistryHelper, ) @@ -24,20 +24,17 @@ from llama_stack.providers.utils.bedrock.client import create_bedrock_client model_aliases = [ - ModelAlias( - provider_model_id="meta.llama3-1-8b-instruct-v1:0", - aliases=["Llama3.1-8B-Instruct"], - llama_model=CoreModelId.llama3_1_8b_instruct, + build_model_alias( + "meta.llama3-1-8b-instruct-v1:0", + CoreModelId.llama3_1_8b_instruct, ), - ModelAlias( - provider_model_id="meta.llama3-1-70b-instruct-v1:0", - aliases=["Llama3.1-70B-Instruct"], - llama_model=CoreModelId.llama3_1_70b_instruct, + build_model_alias( + "meta.llama3-1-70b-instruct-v1:0", + CoreModelId.llama3_1_70b_instruct, ), - ModelAlias( - provider_model_id="meta.llama3-1-405b-instruct-v1:0", - aliases=["Llama3.1-405B-Instruct"], - llama_model=CoreModelId.llama3_1_405b_instruct, + build_model_alias( + "meta.llama3-1-405b-instruct-v1:0", + CoreModelId.llama3_1_405b_instruct, ), ] diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py index fedea0f86..1337b6c09 100644 --- a/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/llama_stack/providers/remote/inference/databricks/databricks.py @@ -18,7 +18,7 @@ from openai import OpenAI from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.model_registry import ( - ModelAlias, + build_model_alias, ModelRegistryHelper, ) from llama_stack.providers.utils.inference.openai_compat import ( @@ -34,15 +34,13 @@ from .config import DatabricksImplConfig model_aliases = [ - ModelAlias( - provider_model_id="databricks-meta-llama-3-1-70b-instruct", - aliases=["Llama3.1-70B-Instruct"], - llama_model=CoreModelId.llama3_1_70b_instruct.value, + build_model_alias( + "databricks-meta-llama-3-1-70b-instruct", + CoreModelId.llama3_1_70b_instruct, ), - ModelAlias( - provider_model_id="databricks-meta-llama-3-1-405b-instruct", - aliases=["Llama3.1-405B-Instruct"], - llama_model=CoreModelId.llama3_1_405b_instruct.value, + build_model_alias( + "databricks-meta-llama-3-1-405b-instruct", + CoreModelId.llama3_1_405b_instruct, ), ] diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index ce9639cbd..e0d42c721 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -15,7 +15,7 @@ from llama_models.llama3.api.tokenizer import Tokenizer from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.providers.utils.inference.model_registry import ( - ModelAlias, + build_model_alias, ModelRegistryHelper, ) from llama_stack.providers.utils.inference.openai_compat import ( @@ -36,50 +36,41 @@ from .config import FireworksImplConfig model_aliases = [ - ModelAlias( - provider_model_id="fireworks/llama-v3p1-8b-instruct", - aliases=["Llama3.1-8B-Instruct"], - llama_model=CoreModelId.llama3_1_8b_instruct.value, + build_model_alias( + "fireworks/llama-v3p1-8b-instruct", + CoreModelId.llama3_1_8b_instruct, ), - ModelAlias( - provider_model_id="fireworks/llama-v3p1-70b-instruct", - aliases=["Llama3.1-70B-Instruct"], - llama_model=CoreModelId.llama3_1_70b_instruct.value, + build_model_alias( + "fireworks/llama-v3p1-70b-instruct", + CoreModelId.llama3_1_70b_instruct, ), - ModelAlias( - provider_model_id="fireworks/llama-v3p1-405b-instruct", - aliases=["Llama3.1-405B-Instruct"], - llama_model=CoreModelId.llama3_1_405b_instruct.value, + build_model_alias( + "fireworks/llama-v3p1-405b-instruct", + CoreModelId.llama3_1_405b_instruct, ), - ModelAlias( - provider_model_id="fireworks/llama-v3p2-1b-instruct", - aliases=["Llama3.2-1B-Instruct"], - llama_model=CoreModelId.llama3_2_3b_instruct.value, + build_model_alias( + "fireworks/llama-v3p2-1b-instruct", + CoreModelId.llama3_2_3b_instruct, ), - ModelAlias( - provider_model_id="fireworks/llama-v3p2-3b-instruct", - aliases=["Llama3.2-3B-Instruct"], - llama_model=CoreModelId.llama3_2_11b_vision_instruct.value, + build_model_alias( + "fireworks/llama-v3p2-3b-instruct", + CoreModelId.llama3_2_11b_vision_instruct, ), - ModelAlias( - provider_model_id="fireworks/llama-v3p2-11b-vision-instruct", - aliases=["Llama3.2-11B-Vision-Instruct"], - llama_model=CoreModelId.llama3_2_11b_vision_instruct.value, + build_model_alias( + "fireworks/llama-v3p2-11b-vision-instruct", + CoreModelId.llama3_2_11b_vision_instruct, ), - ModelAlias( - provider_model_id="fireworks/llama-v3p2-90b-vision-instruct", - aliases=["Llama3.2-90B-Vision-Instruct"], - llama_model=CoreModelId.llama3_2_90b_vision_instruct.value, + build_model_alias( + "fireworks/llama-v3p2-90b-vision-instruct", + CoreModelId.llama3_2_90b_vision_instruct, ), - ModelAlias( - provider_model_id="fireworks/llama-guard-3-8b", - aliases=["Llama-Guard-3-8B"], - llama_model=CoreModelId.llama_guard_3_8b.value, + build_model_alias( + "fireworks/llama-guard-3-8b", + CoreModelId.llama_guard_3_8b, ), - ModelAlias( - provider_model_id="fireworks/llama-guard-3-11b-vision", - aliases=["Llama-Guard-3-11B-Vision"], - llama_model=CoreModelId.llama_guard_3_11b_vision.value, + build_model_alias( + "fireworks/llama-guard-3-11b-vision", + CoreModelId.llama_guard_3_11b_vision, ), ] diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py index 4a7f548a6..34af95b50 100644 --- a/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/llama_stack/providers/remote/inference/ollama/ollama.py @@ -15,7 +15,7 @@ from llama_models.llama3.api.tokenizer import Tokenizer from ollama import AsyncClient from llama_stack.providers.utils.inference.model_registry import ( - ModelAlias, + build_model_alias, ModelRegistryHelper, ) @@ -40,40 +40,33 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( model_aliases = [ - ModelAlias( - provider_model_id="llama3.1:8b-instruct-fp16", - aliases=["Llama3.1-8B-Instruct"], - llama_model=CoreModelId.llama3_1_8b_instruct.value, + build_model_alias( + "llama3.1:8b-instruct-fp16", + CoreModelId.llama3_1_8b_instruct, ), - ModelAlias( - provider_model_id="llama3.1:70b-instruct-fp16", - aliases=["Llama3.1-70B-Instruct"], - llama_model=CoreModelId.llama3_1_70b_instruct.value, + build_model_alias( + "llama3.1:70b-instruct-fp16", + CoreModelId.llama3_1_70b_instruct, ), - ModelAlias( - provider_model_id="llama3.2:1b-instruct-fp16", - aliases=["Llama3.2-1B-Instruct"], - llama_model=CoreModelId.llama3_2_1b_instruct.value, + build_model_alias( + "llama3.2:1b-instruct-fp16", + CoreModelId.llama3_2_1b_instruct, ), - ModelAlias( - provider_model_id="llama3.2:3b-instruct-fp16", - aliases=["Llama3.2-3B-Instruct"], - llama_model=CoreModelId.llama3_2_3b_instruct.value, + build_model_alias( + "llama3.2:3b-instruct-fp16", + CoreModelId.llama3_2_3b_instruct, ), - ModelAlias( - provider_model_id="llama-guard3:8b", - aliases=["Llama-Guard-3-8B"], - llama_model=CoreModelId.llama_guard_3_8b.value, + build_model_alias( + "llama-guard3:8b", + CoreModelId.llama_guard_3_8b, ), - ModelAlias( - provider_model_id="llama-guard3:1b", - aliases=["Llama-Guard-3-1B"], - llama_model=CoreModelId.llama_guard_3_1b.value, + build_model_alias( + "llama-guard3:1b", + CoreModelId.llama_guard_3_1b, ), - ModelAlias( - provider_model_id="x/llama3.2-vision:11b-instruct-fp16", - aliases=["Llama3.2-11B-Vision-Instruct"], - llama_model=CoreModelId.llama3_2_11b_vision_instruct.value, + build_model_alias( + "x/llama3.2-vision:11b-instruct-fp16", + CoreModelId.llama3_2_11b_vision_instruct, ), ] diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 75f93f64f..644302a0f 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -18,7 +18,7 @@ from together import Together from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.providers.utils.inference.model_registry import ( - ModelAlias, + build_model_alias, ModelRegistryHelper, ) from llama_stack.providers.utils.inference.openai_compat import ( @@ -39,45 +39,37 @@ from .config import TogetherImplConfig model_aliases = [ - ModelAlias( - provider_model_id="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", - aliases=["Llama3.1-8B-Instruct"], - llama_model=CoreModelId.llama3_1_8b_instruct.value, + build_model_alias( + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + CoreModelId.llama3_1_8b_instruct, ), - ModelAlias( - provider_model_id="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", - aliases=["Llama3.1-70B-Instruct"], - llama_model=CoreModelId.llama3_1_70b_instruct.value, + build_model_alias( + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", + CoreModelId.llama3_1_70b_instruct, ), - ModelAlias( - provider_model_id="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", - aliases=["Llama3.1-405B-Instruct"], - llama_model=CoreModelId.llama3_1_405b_instruct.value, + build_model_alias( + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", + CoreModelId.llama3_1_405b_instruct, ), - ModelAlias( - provider_model_id="meta-llama/Llama-3.2-3B-Instruct-Turbo", - aliases=["Llama3.2-3B-Instruct"], - llama_model=CoreModelId.llama3_2_3b_instruct.value, + build_model_alias( + "meta-llama/Llama-3.2-3B-Instruct-Turbo", + CoreModelId.llama3_2_3b_instruct, ), - ModelAlias( - provider_model_id="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", - aliases=["Llama3.2-11B-Vision-Instruct"], - llama_model=CoreModelId.llama3_2_11b_vision_instruct.value, + build_model_alias( + "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", + CoreModelId.llama3_2_11b_vision_instruct, ), - ModelAlias( - provider_model_id="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", - aliases=["Llama3.2-90B-Vision-Instruct"], - llama_model=CoreModelId.llama3_2_90b_vision_instruct.value, + build_model_alias( + "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", + CoreModelId.llama3_2_90b_vision_instruct, ), - ModelAlias( - provider_model_id="meta-llama/Meta-Llama-Guard-3-8B", - aliases=["Llama-Guard-3-8B"], - llama_model=CoreModelId.llama_guard_3_8b.value, + build_model_alias( + "meta-llama/Meta-Llama-Guard-3-8B", + CoreModelId.llama_guard_3_8b, ), - ModelAlias( - provider_model_id="meta-llama/Llama-Guard-3-11B-Vision-Turbo", - aliases=["Llama-Guard-3-11B-Vision"], - llama_model=CoreModelId.llama_guard_3_11b_vision.value, + build_model_alias( + "meta-llama/Llama-Guard-3-11B-Vision-Turbo", + CoreModelId.llama_guard_3_11b_vision, ), ] diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index c49541fd9..9bf25c5ad 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -16,7 +16,7 @@ from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ( - ModelAlias, + build_model_alias, ModelRegistryHelper, ) from llama_stack.providers.utils.inference.openai_compat import ( @@ -36,10 +36,9 @@ from .config import VLLMInferenceAdapterConfig def build_model_aliases(): return [ - ModelAlias( - provider_model_id=model.huggingface_repo, - aliases=[model.descriptor()], - llama_model=model.descriptor(), + build_model_alias( + model.huggingface_repo, + model.core_model_id, ) for model in all_registered_models() if model.huggingface_repo @@ -55,11 +54,6 @@ class VLLMInferenceAdapter(Inference, ModelRegistryHelper, ModelsProtocolPrivate self.config = config self.formatter = ChatFormat(Tokenizer.get_instance()) self.client = None - self.huggingface_repo_to_llama_model_id = { - model.huggingface_repo: model.descriptor() - for model in all_registered_models() - if model.huggingface_repo - } async def initialize(self) -> None: self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token) diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index b3401d8f5..35d67a4cc 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -5,13 +5,35 @@ # the root directory of this source tree. from collections import namedtuple -from typing import List +from typing import List, Optional + +from llama_models.datatypes import CoreModelId +from llama_models.sku_list import all_registered_models from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"]) +def get_huggingface_repo(core_model_id: CoreModelId) -> Optional[str]: + """Get the Hugging Face repository for a given CoreModelId.""" + for model in all_registered_models(): + if model.core_model_id == core_model_id: + return model.huggingface_repo + return None + + +def build_model_alias(provider_model_id: str, core_model_id: CoreModelId) -> ModelAlias: + return ModelAlias( + provider_model_id=provider_model_id, + aliases=[ + core_model_id.value, + get_huggingface_repo(core_model_id), + ], + llama_model=core_model_id.value, + ) + + class ModelLookup: def __init__( self,