fix: allow skipping model availability check for vLLM

2025-12-12 20:12:33 +00:00 · 2025-10-08 20:06:19 +02:00 · 2025-10-08 20:06:19 +02:00 · f20eb57bef
commit f20eb57bef
parent 5d711d4bcb
1 changed files with 16 additions and 0 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -77,6 +77,22 @@ class VLLMInferenceAdapter(OpenAIMixin):
    def get_extra_client_params(self):
        return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
    async def check_model_availability(self, model: str) -> bool:
        """
        Check model availability only when api_token is configured.
        Skip the check when running without authentication.
        """
        if self.config.api_token:
            # If we have a token, perform the normal availability check
            try:
                return model in [m.id async for m in self.client.models.list()]
            except Exception as e:
                # If listing models fails, log the error but allow the model
                log.warning(f"Failed to check model availability: {e}")
                return True
        # Without a token, skip the check to avoid OAuth redirects
        return True
    async def openai_chat_completion(
        self,
        model: str,