to add health status check for remote vllm

This commit is contained in:
Sumit Jaiswal 2025-05-29 02:10:13 +05:30
parent b21050935e
commit 6d1cf140ba
No known key found for this signature in database
GPG key ID: A4604B39D64D6AEC
2 changed files with 71 additions and 2 deletions

View file

@ -9,6 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator
from typing import Any
import httpx
import requests
from openai import AsyncOpenAI
from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OpenAIChatCompletionChunk,
@ -56,7 +57,11 @@ from llama_stack.apis.inference.inference import (
from llama_stack.apis.models import Model, ModelType
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.datatypes import (
HealthResponse,
HealthStatus,
ModelsProtocolPrivate,
)
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_hf_repo_model_entry,
@ -298,6 +303,30 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
async def unregister_model(self, model_id: str) -> None:
pass
async def health(self) -> HealthResponse:
"""
Performs a health check by verifying connectivity to the remote VLLM server.
This method is used by initialize() and the Provider API to verify
that the service is running correctly.
Returns:
HealthResponse: A dictionary containing the health status.
"""
try:
headers = {}
client = self._create_client() if self.client is None else self.client
if client.api_key:
headers["Authorization"] = f"Bearer {client.api_key}"
models_url = f"{client.base_url}/v1/models"
requests.get(models_url, headers=headers, timeout=10)
return HealthResponse(
status=HealthStatus.OK
)
except Exception as ex:
return HealthResponse(
status=HealthStatus.ERROR,
message=f"Health check failed: {str(ex)}"
)
async def _get_model(self, model_id: str) -> Model:
if not self.model_store:
raise ValueError("Model store not set")