mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-02 20:40:36 +00:00
to add health status check for remote vllm
This commit is contained in:
parent
b21050935e
commit
6d1cf140ba
2 changed files with 71 additions and 2 deletions
|
@ -11,7 +11,7 @@ import threading
|
|||
import time
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
@ -44,6 +44,7 @@ from llama_stack.apis.inference import (
|
|||
)
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.models.llama.datatypes import StopReason, ToolCall
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
|
||||
from llama_stack.providers.remote.inference.vllm.vllm import (
|
||||
VLLMInferenceAdapter,
|
||||
|
@ -639,3 +640,42 @@ async def test_process_vllm_chat_completion_stream_response_tool_without_args():
|
|||
assert chunks[-2].event.delta.type == "tool_call"
|
||||
assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
|
||||
assert chunks[-2].event.delta.tool_call.arguments == {}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_status_success(vllm_inference_adapter):
|
||||
"""
|
||||
Test the health method of VLLM InferenceAdapter when the connection is successful.
|
||||
|
||||
This test verifies that the health method returns a HealthResponse with status OK, only
|
||||
when the connection to the vLLM server is successful.
|
||||
"""
|
||||
# Mock the requests.get method to return a successful response
|
||||
with patch('requests.get') as mock_get:
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_get.return_value = mock_response
|
||||
# Call the health method
|
||||
health_response = await vllm_inference_adapter.health()
|
||||
# Verify the response
|
||||
assert health_response["status"] == HealthStatus.OK
|
||||
mock_get.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_status_failure(vllm_inference_adapter):
|
||||
"""
|
||||
Test the health method of VLLM InferenceAdapter when the connection fails.
|
||||
|
||||
This test verifies that the health method returns a HealthResponse with status ERROR
|
||||
and an appropriate error message when the connection to the vLLM server fails.
|
||||
"""
|
||||
# Mock the requests.get method to raise an exception
|
||||
with patch('requests.get') as mock_get:
|
||||
mock_get.side_effect = Exception("Connection failed")
|
||||
health_response = await vllm_inference_adapter.health()
|
||||
# Verify the response
|
||||
assert health_response["status"] == HealthStatus.ERROR
|
||||
assert "Health check failed: Connection failed" in health_response["message"]
|
||||
# Verify that requests.get was called
|
||||
mock_get.assert_called_once()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue