inference registry updates

2025-12-08 19:10:56 +00:00 · 2024-10-05 22:25:48 -07:00 · 2024-10-05 22:25:48 -07:00 · 59302a86df
commit 59302a86df
parent 4215cc9331
12 changed files with 570 additions and 535 deletions
--- a/llama_stack/providers/adapters/inference/ollama/ollama.py
+++ b/llama_stack/providers/adapters/inference/ollama/ollama.py
@ -18,7 +18,7 @@ from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.providers.utils.inference.augment_messages import (
    augment_messages_for_tools,
 )
-from llama_stack.providers.utils.inference.routable import RoutableProviderForModels
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper

 # TODO: Eventually this will move to the llama cli model list command
 # mapping of Model SKUs to ollama models
@ -27,12 +27,13 @@ OLLAMA_SUPPORTED_SKUS = {
    "Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16",
    "Llama3.2-1B-Instruct": "llama3.2:1b-instruct-fp16",
    "Llama3.2-3B-Instruct": "llama3.2:3b-instruct-fp16",
+    "Llama-Guard-3-8B": "xe/llamaguard3:latest",
 }


-class OllamaInferenceAdapter(Inference, RoutableProviderForModels):
+class OllamaInferenceAdapter(ModelRegistryHelper, Inference):
    def __init__(self, url: str) -> None:
-        RoutableProviderForModels.__init__(
+        ModelRegistryHelper.__init__(
            self, stack_to_provider_models_map=OLLAMA_SUPPORTED_SKUS
        )
        self.url = url