chore: create OpenAIMixin for inference providers with an OpenAI-compat API that need to implement openai_* methods (#2835)

# What does this PR do? add an `OpenAIMixin` for use by inference providers who remote endpoints support an OpenAI compatible API. use is demonstrated by refactoring - OpenAIInferenceAdapter - NVIDIAInferenceAdapter (adds embedding support) - LlamaCompatInferenceAdapter ## Test Plan existing unit and integration tests
2025-10-04 04:04:14 +00:00 · 2025-07-23 06:49:40 -04:00 · 2025-07-23 06:49:40 -04:00 · e1ed152779
commit e1ed152779
parent fc67ad408a
7 changed files with 402 additions and 387 deletions
--- a/tests/unit/providers/inference/test_inference_client_caching.py
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@ -10,6 +10,8 @@ from unittest.mock import MagicMock
 from llama_stack.distribution.request_headers import request_provider_data_context
 from llama_stack.providers.remote.inference.groq.config import GroqConfig
 from llama_stack.providers.remote.inference.groq.groq import GroqInferenceAdapter
+from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
+from llama_stack.providers.remote.inference.llama_openai_compat.llama import LlamaCompatInferenceAdapter
 from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
 from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
 from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
@ -50,7 +52,7 @@ def test_openai_provider_openai_client_caching():
        with request_provider_data_context(
            {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
        ):
-            openai_client = inference_adapter._get_openai_client()
+            openai_client = inference_adapter.client
            assert openai_client.api_key == api_key


@ -71,3 +73,18 @@ def test_together_provider_openai_client_caching():
            assert together_client.client.api_key == api_key
            openai_client = inference_adapter._get_openai_client()
            assert openai_client.api_key == api_key
+
+
+def test_llama_compat_provider_openai_client_caching():
+    """Ensure the LlamaCompat provider does not cache api keys across client requests"""
+    config = LlamaCompatConfig()
+    inference_adapter = LlamaCompatInferenceAdapter(config)
+
+    inference_adapter.__provider_spec__ = MagicMock()
+    inference_adapter.__provider_spec__.provider_data_validator = (
+        "llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator"
+    )
+
+    for api_key in ["test1", "test2"]:
+        with request_provider_data_context({"x-llamastack-provider-data": json.dumps({"llama_api_key": api_key})}):
+            assert inference_adapter.client.api_key == api_key