diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 310eaf7b6..5974ca176 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -77,6 +77,20 @@ class VLLMInferenceAdapter(OpenAIMixin):
     def get_extra_client_params(self):
         return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
 
+    async def check_model_availability(self, model: str) -> bool:
+        """
+        Skip the check when running without authentication.
+        """
+        if not self.config.api_token:
+            model_ids = []
+            async for m in self.client.models.list():
+                if m.id == model:  # Found exact match
+                    return True
+                model_ids.append(m.id)
+            raise ValueError(f"Model '{model}' not found. Available models: {model_ids}")
+        log.warning(f"Not checking model availability for {model} as API token may trigger OAuth workflow")
+        return True
+
     async def openai_chat_completion(
         self,
         model: str,