fix: Don't cache clients for passthrough auth providers

Some of our inference providers support passthrough authentication via
`x-llamastack-provider-data` header values. This fixes the providers
that support passthrough auth to not cache their clients to the
backend providers (mostly OpenAI client instances) so that the client
connecting to Llama Stack has to provide those auth values on each and
every request.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-07-11 10:11:31 -04:00
parent d880c2df0e
commit fa9e2dd543
4 changed files with 103 additions and 45 deletions

View file

@ -38,24 +38,18 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
provider_data_api_key_field="groq_api_key",
)
self.config = config
self._openai_client = None
async def initialize(self):
await super().initialize()
async def shutdown(self):
await super().shutdown()
if self._openai_client:
await self._openai_client.close()
self._openai_client = None
def _get_openai_client(self) -> AsyncOpenAI:
if not self._openai_client:
self._openai_client = AsyncOpenAI(
base_url=f"{self.config.url}/openai/v1",
api_key=self.config.api_key,
)
return self._openai_client
return AsyncOpenAI(
base_url=f"{self.config.url}/openai/v1",
api_key=self.get_api_key(),
)
async def openai_chat_completion(
self,