diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index b4079c39f..15f807846 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -504,7 +504,7 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro except ValueError: pass # Ignore statically unknown model, will check live listing try: - res = await self.client.models.list() + res = self.client.models.list() except APIConnectionError as e: raise ValueError( f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL." diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index 9545e0cf6..4dc2e0c16 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -62,7 +62,7 @@ from llama_stack.providers.remote.inference.vllm.vllm import ( @pytest.fixture(scope="module") def mock_openai_models_list(): - with patch("openai.resources.models.AsyncModels.list", new_callable=AsyncMock) as mock_list: + with patch("openai.resources.models.AsyncModels.list") as mock_list: yield mock_list