From 24c6b01d16eba34159a1e6388ce25b902a6aba9d Mon Sep 17 00:00:00 2001
From: Akram Ben Aissi <akram.benaissi@gmail.com>
Date: Wed, 8 Oct 2025 20:40:55 +0200
Subject: [PATCH] Review, reverse the conditional on api_token_presence

---
 llama_stack/providers/remote/inference/vllm/vllm.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index d4b309265..e0ac88fc4 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -79,18 +79,14 @@ class VLLMInferenceAdapter(OpenAIMixin):
 
     async def check_model_availability(self, model: str) -> bool:
         """
-        Check model availability only when api_token is configured.
         Skip the check when running without authentication.
         """
-        if self.config.api_token:
-            # If we have a token, perform the normal availability check
+        if not self.config.api_token:
             try:
                 return model in [m.id async for m in self.client.models.list()]
             except Exception as e:
-                # If listing models fails, log the error but allow the model
                 log.warning(f"Failed to check model availability: {e}")
-                return True
-        # Without a token, skip the check to avoid OAuth redirects
+                raise ValueError(f"Failed to check model availability: {e}") from e
         return True
 
     async def openai_chat_completion(