fix: Don't cache clients for passthrough auth providers

Some of our inference providers support passthrough authentication via `x-llamastack-provider-data` header values. This fixes the providers that support passthrough auth to not cache their clients to the backend providers (mostly OpenAI client instances) so that the client connecting to Llama Stack has to provide those auth values on each and every request. Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-12-23 21:23:59 +00:00 · 2025-07-11 10:11:31 -04:00 · 2025-07-11 10:11:31 -04:00 · fa9e2dd543
commit fa9e2dd543
parent d880c2df0e
4 changed files with 103 additions and 45 deletions
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@ -38,24 +38,18 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
            provider_data_api_key_field="groq_api_key",
        )
        self.config = config
-        self._openai_client = None

    async def initialize(self):
        await super().initialize()

    async def shutdown(self):
        await super().shutdown()
-        if self._openai_client:
-            await self._openai_client.close()
-            self._openai_client = None

    def _get_openai_client(self) -> AsyncOpenAI:
-        if not self._openai_client:
-            self._openai_client = AsyncOpenAI(
-                base_url=f"{self.config.url}/openai/v1",
-                api_key=self.config.api_key,
-            )
-        return self._openai_client
+        return AsyncOpenAI(
+            base_url=f"{self.config.url}/openai/v1",
+            api_key=self.get_api_key(),
+        )

    async def openai_chat_completion(
        self,