diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 21e4c0e1d..9e084ea30 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -9,7 +9,6 @@ from collections.abc import AsyncGenerator, AsyncIterator from typing import Any import httpx -import requests from openai import AsyncOpenAI from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, @@ -314,14 +313,9 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): try: client = self._create_client() if self.client is None else self.client client.models.list() # Ensure the client is initialized - return HealthResponse( - status=HealthStatus.OK - ) + return HealthResponse(status=HealthStatus.OK) except Exception as ex: - return HealthResponse( - status=HealthStatus.ERROR, - message=f"Health check failed: {str(ex)}" - ) + return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(ex)}") async def _get_model(self, model_id: str) -> Model: if not self.model_store: diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index 375cfe82d..1fc68a631 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -653,7 +653,7 @@ async def test_health_status_success(vllm_inference_adapter): # Mock the client.models.list method to return successfully # Set vllm_inference_adapter.client to None to ensure _create_client is called vllm_inference_adapter.client = None - with patch.object(vllm_inference_adapter, '_create_client') as mock_create_client: + with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client: # Create mock client and models mock_client = MagicMock() mock_models = MagicMock() @@ -678,7 +678,7 @@ async def test_health_status_failure(vllm_inference_adapter): """ vllm_inference_adapter.client = None # Mock the client.models.list method to raise an exception - with patch.object(vllm_inference_adapter, '_create_client') as mock_create_client: + with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client: # Create mock client and models mock_client = MagicMock() mock_models = MagicMock()