From f20eb57befdc037d9151dfa5919329e4c98f830b Mon Sep 17 00:00:00 2001 From: Akram Ben Aissi Date: Wed, 8 Oct 2025 20:06:19 +0200 Subject: [PATCH] fix: allow skipping model availability check for vLLM --- .../providers/remote/inference/vllm/vllm.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 310eaf7b6..d4b309265 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -77,6 +77,22 @@ class VLLMInferenceAdapter(OpenAIMixin): def get_extra_client_params(self): return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)} + async def check_model_availability(self, model: str) -> bool: + """ + Check model availability only when api_token is configured. + Skip the check when running without authentication. + """ + if self.config.api_token: + # If we have a token, perform the normal availability check + try: + return model in [m.id async for m in self.client.models.list()] + except Exception as e: + # If listing models fails, log the error but allow the model + log.warning(f"Failed to check model availability: {e}") + return True + # Without a token, skip the check to avoid OAuth redirects + return True + async def openai_chat_completion( self, model: str,