mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-29 03:14:19 +00:00
to add health status check for remote vllm
This commit is contained in:
parent
b21050935e
commit
6d1cf140ba
2 changed files with 71 additions and 2 deletions
|
@ -9,6 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator
|
|||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
from openai import AsyncOpenAI
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||
|
@ -56,7 +57,11 @@ from llama_stack.apis.inference.inference import (
|
|||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall
|
||||
from llama_stack.models.llama.sku_list import all_registered_models
|
||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
from llama_stack.providers.datatypes import (
|
||||
HealthResponse,
|
||||
HealthStatus,
|
||||
ModelsProtocolPrivate,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_hf_repo_model_entry,
|
||||
|
@ -298,6 +303,30 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def unregister_model(self, model_id: str) -> None:
|
||||
pass
|
||||
|
||||
async def health(self) -> HealthResponse:
|
||||
"""
|
||||
Performs a health check by verifying connectivity to the remote VLLM server.
|
||||
This method is used by initialize() and the Provider API to verify
|
||||
that the service is running correctly.
|
||||
Returns:
|
||||
HealthResponse: A dictionary containing the health status.
|
||||
"""
|
||||
try:
|
||||
headers = {}
|
||||
client = self._create_client() if self.client is None else self.client
|
||||
if client.api_key:
|
||||
headers["Authorization"] = f"Bearer {client.api_key}"
|
||||
models_url = f"{client.base_url}/v1/models"
|
||||
requests.get(models_url, headers=headers, timeout=10)
|
||||
return HealthResponse(
|
||||
status=HealthStatus.OK
|
||||
)
|
||||
except Exception as ex:
|
||||
return HealthResponse(
|
||||
status=HealthStatus.ERROR,
|
||||
message=f"Health check failed: {str(ex)}"
|
||||
)
|
||||
|
||||
async def _get_model(self, model_id: str) -> Model:
|
||||
if not self.model_store:
|
||||
raise ValueError("Model store not set")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue