diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index b4079c39f..15f807846 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -504,7 +504,7 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
         except ValueError:
             pass  # Ignore statically unknown model, will check live listing
         try:
-            res = await self.client.models.list()
+            res = self.client.models.list()
         except APIConnectionError as e:
             raise ValueError(
                 f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index 9545e0cf6..4dc2e0c16 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -62,7 +62,7 @@ from llama_stack.providers.remote.inference.vllm.vllm import (
 
 @pytest.fixture(scope="module")
 def mock_openai_models_list():
-    with patch("openai.resources.models.AsyncModels.list", new_callable=AsyncMock) as mock_list:
+    with patch("openai.resources.models.AsyncModels.list") as mock_list:
         yield mock_list