chore: turn OpenAIMixin into a pydantic.BaseModel

- implement get_api_key instead of relying on LiteLLMOpenAIMixin.get_api_key - remove use of LiteLLMOpenAIMixin - add default initialize/shutdown methods to OpenAIMixin - remove __init__s to allow proper pydantic construction - remove dead code from vllm adapter and associated / duplicate unit tests - update vllm adapter to use openaimixin for model registration - remove ModelRegistryHelper from fireworks & together adapters - remove Inference from nvidia adapter - complete type hints on embedding_model_metadata - allow extra fields on OpenAIMixin, for model_store, __provider_id__, etc - new recordings for ollama
2025-10-05 20:27:35 +00:00 · 2025-10-02 20:47:54 -04:00 · 2025-10-02 20:47:54 -04:00 · 60f0056cbc
commit 60f0056cbc
parent ce77c27ff8
57 changed files with 12520 additions and 1254 deletions
--- a/llama_stack/providers/remote/inference/llama_openai_compat/init.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/init.py
@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.apis.inference import InferenceProvider
-
 from .config import LlamaCompatConfig


-async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider:
+async def get_adapter_impl(config: LlamaCompatConfig, _deps):
    # import dynamically so the import is used only when it is needed
    from .llama import LlamaCompatInferenceAdapter

-    adapter = LlamaCompatInferenceAdapter(config)
+    adapter = LlamaCompatInferenceAdapter(config=config)
    return adapter
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -5,38 +5,21 @@
 # the root directory of this source tree.
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
-from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

 logger = get_logger(name=__name__, category="inference::llama_openai_compat")


-class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
+class LlamaCompatInferenceAdapter(OpenAIMixin):
+    config: LlamaCompatConfig
+
+    provider_data_api_key_field: str = "llama_api_key"
    """
    Llama API Inference Adapter for Llama Stack.
-
-    Note: The inheritance order is important here. OpenAIMixin must come before
-    LiteLLMOpenAIMixin to ensure that OpenAIMixin.check_model_availability()
-    is used instead of ModelRegistryHelper.check_model_availability().
-
-    - OpenAIMixin.check_model_availability() queries the Llama API to check if a model exists
-    - ModelRegistryHelper.check_model_availability() (inherited by LiteLLMOpenAIMixin) just returns False and shows a warning
    """

-    _config: LlamaCompatConfig
-
-    def __init__(self, config: LlamaCompatConfig):
-        LiteLLMOpenAIMixin.__init__(
-            self,
-            litellm_provider_name="meta_llama",
-            api_key_from_config=config.api_key,
-            provider_data_api_key_field="llama_api_key",
-            openai_compat_api_base=config.openai_compat_api_base,
-        )
-        self.config = config
-
-    # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
-    get_api_key = LiteLLMOpenAIMixin.get_api_key
+    def get_api_key(self) -> str:
+        return self.config.api_key or ""

    def get_base_url(self) -> str:
        """
@ -45,9 +28,3 @@ class LlamaCompatInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
        :return: The Llama API base URL
        """
        return self.config.openai_compat_api_base
-
-    async def initialize(self):
-        await super().initialize()
-
-    async def shutdown(self):
-        await super().shutdown()