From 9c42598aee14481f4954ad71cce3a57888487a4e Mon Sep 17 00:00:00 2001
From: Sumit Jaiswal <sjaiswal@redhat.com>
Date: Fri, 30 May 2025 16:14:31 +0530
Subject: [PATCH] fix review around /models api call

---
 llama_stack/providers/remote/inference/vllm/vllm.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 49a0b96bc..21e4c0e1d 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -312,12 +312,8 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
             HealthResponse: A dictionary containing the health status.
         """
         try:
-            headers = {}
             client = self._create_client() if self.client is None else self.client
-            if client.api_key:
-                headers["Authorization"] = f"Bearer {client.api_key}"
-            models_url = f"{client.base_url}models"
-            requests.get(models_url, headers=headers, timeout=10)
+            client.models.list()  # Ensure the client is initialized
             return HealthResponse(
                 status=HealthStatus.OK
             )