Inference to use provider resource id to register and validate (#428)

This PR changes the way model id gets translated to the final model name that gets passed through the provider. Major changes include: 1) Providers are responsible for registering an object and as part of the registration returning the object with the correct provider specific name of the model provider_resource_id 2) To help with the common look ups different names a new ModelLookup class is created. Tested all inference providers including together, fireworks, vllm, ollama, meta reference and bedrock
2024-11-12 20:02:00 -08:00 · 2024-11-12 20:02:00 -08:00 · fdff24e77a
commit fdff24e77a
parent e51107e019
21 changed files with 460 additions and 290 deletions
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@ -6,6 +6,8 @@

 from typing import AsyncGenerator

+from llama_models.datatypes import CoreModelId
+
 from llama_models.llama3.api.chat_format import ChatFormat

 from llama_models.llama3.api.datatypes import Message
@ -15,7 +17,10 @@ from openai import OpenAI

 from llama_stack.apis.inference import *  # noqa: F403

-from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
+from llama_stack.providers.utils.inference.model_registry import (
+    build_model_alias,
+    ModelRegistryHelper,
+)
 from llama_stack.providers.utils.inference.openai_compat import (
    get_sampling_options,
    process_chat_completion_response,
@ -28,16 +33,23 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
 from .config import DatabricksImplConfig


-DATABRICKS_SUPPORTED_MODELS = {
-    "Llama3.1-70B-Instruct": "databricks-meta-llama-3-1-70b-instruct",
-    "Llama3.1-405B-Instruct": "databricks-meta-llama-3-1-405b-instruct",
-}
+model_aliases = [
+    build_model_alias(
+        "databricks-meta-llama-3-1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_model_alias(
+        "databricks-meta-llama-3-1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+]


 class DatabricksInferenceAdapter(ModelRegistryHelper, Inference):
    def __init__(self, config: DatabricksImplConfig) -> None:
        ModelRegistryHelper.__init__(
-            self, stack_to_provider_models_map=DATABRICKS_SUPPORTED_MODELS
+            self,
+            model_aliases=model_aliases,
        )
        self.config = config
        self.formatter = ChatFormat(Tokenizer.get_instance())
@ -113,8 +125,10 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference):

    def _get_params(self, request: ChatCompletionRequest) -> dict:
        return {
-            "model": self.map_to_provider_model(request.model),
-            "prompt": chat_completion_request_to_prompt(request, self.formatter),
+            "model": request.model,
+            "prompt": chat_completion_request_to_prompt(
+                request, self.get_llama_model(request.model), self.formatter
+            ),
            "stream": request.stream,
            **get_sampling_options(request.sampling_params),
        }