forked from phoenix-oss/llama-stack-mirror
The `/v1/providers` now reports the health status of each provider when implemented. ``` curl -L http://127.0.0.1:8321/v1/providers|jq % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 4072 100 4072 0 0 246k 0 --:--:-- --:--:-- --:--:-- 248k { "data": [ { "api": "inference", "provider_id": "ollama", "provider_type": "remote::ollama", "config": { "url": "http://localhost:11434" }, "health": { "status": "OK" } }, { "api": "vector_io", "provider_id": "faiss", "provider_type": "inline::faiss", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/faiss_store.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "safety", "provider_id": "llama-guard", "provider_type": "inline::llama-guard", "config": { "excluded_categories": [] }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "agents", "provider_id": "meta-reference", "provider_type": "inline::meta-reference", "config": { "persistence_store": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/agents_store.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "telemetry", "provider_id": "meta-reference", "provider_type": "inline::meta-reference", "config": { "service_name": "llama-stack", "sinks": "console,sqlite", "sqlite_db_path": "/Users/leseb/.llama/distributions/ollama/trace_store.db" }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "eval", "provider_id": "meta-reference", "provider_type": "inline::meta-reference", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/meta_reference_eval.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "datasetio", "provider_id": "huggingface", "provider_type": "remote::huggingface", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/huggingface_datasetio.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "datasetio", "provider_id": "localfs", "provider_type": "inline::localfs", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/localfs_datasetio.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "scoring", "provider_id": "basic", "provider_type": "inline::basic", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "scoring", "provider_id": "llm-as-judge", "provider_type": "inline::llm-as-judge", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "scoring", "provider_id": "braintrust", "provider_type": "inline::braintrust", "config": { "openai_api_key": "********" }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "brave-search", "provider_type": "remote::brave-search", "config": { "api_key": "********", "max_results": 3 }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "tavily-search", "provider_type": "remote::tavily-search", "config": { "api_key": "********", "max_results": 3 }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "code-interpreter", "provider_type": "inline::code-interpreter", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "rag-runtime", "provider_type": "inline::rag-runtime", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "model-context-protocol", "provider_type": "remote::model-context-protocol", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "wolfram-alpha", "provider_type": "remote::wolfram-alpha", "config": { "api_key": "********" }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } } ] } ``` Per providers too: ``` curl -L http://127.0.0.1:8321/v1/providers/ollama {"api":"inference","provider_id":"ollama","provider_type":"remote::ollama","config":{"url":"http://localhost:11434"},"health":{"status":"OK"}} ``` Signed-off-by: Sébastien Han <seb@redhat.com>
128 lines
4.5 KiB
Python
128 lines
4.5 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import asyncio
|
|
from typing import Any, Dict
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
|
|
from llama_stack.log import get_logger
|
|
from llama_stack.providers.datatypes import HealthResponse, HealthStatus
|
|
|
|
from .datatypes import StackRunConfig
|
|
from .utils.config import redact_sensitive_fields
|
|
|
|
logger = get_logger(name=__name__, category="core")
|
|
|
|
|
|
class ProviderImplConfig(BaseModel):
|
|
run_config: StackRunConfig
|
|
|
|
|
|
async def get_provider_impl(config, deps):
|
|
impl = ProviderImpl(config, deps)
|
|
await impl.initialize()
|
|
return impl
|
|
|
|
|
|
class ProviderImpl(Providers):
|
|
def __init__(self, config, deps):
|
|
self.config = config
|
|
self.deps = deps
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
logger.debug("ProviderImpl.shutdown")
|
|
pass
|
|
|
|
async def list_providers(self) -> ListProvidersResponse:
|
|
run_config = self.config.run_config
|
|
safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
|
|
providers_health = await self.get_providers_health()
|
|
ret = []
|
|
for api, providers in safe_config.providers.items():
|
|
for p in providers:
|
|
ret.append(
|
|
ProviderInfo(
|
|
api=api,
|
|
provider_id=p.provider_id,
|
|
provider_type=p.provider_type,
|
|
config=p.config,
|
|
health=providers_health.get(api, {}).get(
|
|
p.provider_id,
|
|
HealthResponse(
|
|
status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
|
|
),
|
|
),
|
|
)
|
|
)
|
|
|
|
return ListProvidersResponse(data=ret)
|
|
|
|
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
|
|
all_providers = await self.list_providers()
|
|
for p in all_providers.data:
|
|
if p.provider_id == provider_id:
|
|
return p
|
|
|
|
raise ValueError(f"Provider {provider_id} not found")
|
|
|
|
async def get_providers_health(self) -> Dict[str, Dict[str, HealthResponse]]:
|
|
"""Get health status for all providers.
|
|
|
|
Returns:
|
|
Dict[str, Dict[str, HealthResponse]]: A dictionary mapping API names to provider health statuses.
|
|
Each API maps to a dictionary of provider IDs to their health responses.
|
|
"""
|
|
providers_health: Dict[str, Dict[str, HealthResponse]] = {}
|
|
timeout = 1.0
|
|
|
|
async def check_provider_health(impl: Any) -> tuple[str, HealthResponse] | None:
|
|
# Skip special implementations (inspect/providers) that don't have provider specs
|
|
if not hasattr(impl, "__provider_spec__"):
|
|
return None
|
|
api_name = impl.__provider_spec__.api.name
|
|
if not hasattr(impl, "health"):
|
|
return (
|
|
api_name,
|
|
HealthResponse(
|
|
status=HealthStatus.NOT_IMPLEMENTED, message="Provider does not implement health check"
|
|
),
|
|
)
|
|
|
|
try:
|
|
health = await asyncio.wait_for(impl.health(), timeout=timeout)
|
|
return api_name, health
|
|
except asyncio.TimeoutError:
|
|
return (
|
|
api_name,
|
|
HealthResponse(
|
|
status=HealthStatus.ERROR, message=f"Health check timed out after {timeout} seconds"
|
|
),
|
|
)
|
|
except Exception as e:
|
|
return (
|
|
api_name,
|
|
HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"),
|
|
)
|
|
|
|
# Create tasks for all providers
|
|
tasks = [check_provider_health(impl) for impl in self.deps.values()]
|
|
|
|
# Wait for all health checks to complete
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
# Organize results by API and provider ID
|
|
for result in results:
|
|
if result is None: # Skip special implementations
|
|
continue
|
|
api_name, health_response = result
|
|
providers_health[api_name] = health_response
|
|
|
|
return providers_health
|