feat: add health to all providers through providers endpoint (#1418)

mirror of https://github.com/meta-llama/llama-stack.git synced 2025-12-20 02:08:48 +00:00

The `/v1/providers` now reports the health status of each
provider when implemented.

```
curl -L http://127.0.0.1:8321/v1/providers|jq
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4072  100  4072    0     0   246k      0 --:--:-- --:--:-- --:--:--  248k
{
  "data": [
    {
      "api": "inference",
      "provider_id": "ollama",
      "provider_type": "remote::ollama",
      "config": {
        "url": "http://localhost:11434"
      },
      "health": {
        "status": "OK"
      }
    },
    {
      "api": "vector_io",
      "provider_id": "faiss",
      "provider_type": "inline::faiss",
      "config": {
        "kvstore": {
          "type": "sqlite",
          "namespace": null,
          "db_path": "/Users/leseb/.llama/distributions/ollama/faiss_store.db"
        }
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "safety",
      "provider_id": "llama-guard",
      "provider_type": "inline::llama-guard",
      "config": {
        "excluded_categories": []
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "agents",
      "provider_id": "meta-reference",
      "provider_type": "inline::meta-reference",
      "config": {
        "persistence_store": {
          "type": "sqlite",
          "namespace": null,
          "db_path": "/Users/leseb/.llama/distributions/ollama/agents_store.db"
        }
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "telemetry",
      "provider_id": "meta-reference",
      "provider_type": "inline::meta-reference",
      "config": {
        "service_name": "llama-stack",
        "sinks": "console,sqlite",
        "sqlite_db_path": "/Users/leseb/.llama/distributions/ollama/trace_store.db"
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "eval",
      "provider_id": "meta-reference",
      "provider_type": "inline::meta-reference",
      "config": {
        "kvstore": {
          "type": "sqlite",
          "namespace": null,
          "db_path": "/Users/leseb/.llama/distributions/ollama/meta_reference_eval.db"
        }
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "datasetio",
      "provider_id": "huggingface",
      "provider_type": "remote::huggingface",
      "config": {
        "kvstore": {
          "type": "sqlite",
          "namespace": null,
          "db_path": "/Users/leseb/.llama/distributions/ollama/huggingface_datasetio.db"
        }
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "datasetio",
      "provider_id": "localfs",
      "provider_type": "inline::localfs",
      "config": {
        "kvstore": {
          "type": "sqlite",
          "namespace": null,
          "db_path": "/Users/leseb/.llama/distributions/ollama/localfs_datasetio.db"
        }
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "scoring",
      "provider_id": "basic",
      "provider_type": "inline::basic",
      "config": {},
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "scoring",
      "provider_id": "llm-as-judge",
      "provider_type": "inline::llm-as-judge",
      "config": {},
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "scoring",
      "provider_id": "braintrust",
      "provider_type": "inline::braintrust",
      "config": {
        "openai_api_key": "********"
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "tool_runtime",
      "provider_id": "brave-search",
      "provider_type": "remote::brave-search",
      "config": {
        "api_key": "********",
        "max_results": 3
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "tool_runtime",
      "provider_id": "tavily-search",
      "provider_type": "remote::tavily-search",
      "config": {
        "api_key": "********",
        "max_results": 3
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "tool_runtime",
      "provider_id": "code-interpreter",
      "provider_type": "inline::code-interpreter",
      "config": {},
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "tool_runtime",
      "provider_id": "rag-runtime",
      "provider_type": "inline::rag-runtime",
      "config": {},
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "tool_runtime",
      "provider_id": "model-context-protocol",
      "provider_type": "remote::model-context-protocol",
      "config": {},
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    },
    {
      "api": "tool_runtime",
      "provider_id": "wolfram-alpha",
      "provider_type": "remote::wolfram-alpha",
      "config": {
        "api_key": "********"
      },
      "health": {
        "status": "Not Implemented",
        "message": "Provider does not implement health check"
      }
    }
  ]
}
```

Per providers too:

```
curl -L http://127.0.0.1:8321/v1/providers/ollama
{"api":"inference","provider_id":"ollama","provider_type":"remote::ollama","config":{"url":"http://localhost:11434"},"health":{"status":"OK"}}
```

Signed-off-by: Sébastien Han <seb@redhat.com>

This commit is contained in:

Sébastien Han

2025-04-14 11:59:36 +02:00

• committed by

GitHub

parent ff14773fa7

commit 69554158fa

No known key found for this signature in database

GPG key ID: B5690EEEBB952194

15 changed files with 244 additions and 76 deletions

									
										26

llama_stack/distribution/routers/routers.py
									
										View file
										
				@ -4,6 +4,7 @@

				# This source code is licensed under the terms described in the LICENSE file in

				# the root directory of this source tree.

				import asyncio

				import time

				from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union

				@ -60,7 +61,7 @@ from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO

				from llama_stack.log import get_logger

				from llama_stack.models.llama.llama3.chat_format import ChatFormat

				from llama_stack.models.llama.llama3.tokenizer import Tokenizer

				from llama_stack.providers.datatypes import RoutingTable

				from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable

				from llama_stack.providers.utils.telemetry.tracing import get_current_span

				logger = get_logger(name=__name__, category="core")

				@ -580,6 +581,29 @@ class InferenceRouter(Inference):

				        provider = self.routing_table.get_provider_impl(model_obj.identifier)

				        return await provider.openai_chat_completion(**params)

				    async def health(self) -> Dict[str, HealthResponse]:

				        health_statuses = {}

				        timeout = 0.5

				        for provider_id, impl in self.routing_table.impls_by_provider_id.items():

				            try:

				                # check if the provider has a health method

				                if not hasattr(impl, "health"):

				                    continue

				                health = await asyncio.wait_for(impl.health(), timeout=timeout)

				                health_statuses[provider_id] = health

				            except asyncio.TimeoutError:

				                health_statuses[provider_id] = HealthResponse(

				                    status=HealthStatus.ERROR,

				                    message=f"Health check timed out after {timeout} seconds",

				                )

				            except NotImplementedError:

				                health_statuses[provider_id] = HealthResponse(status=HealthStatus.NOT_IMPLEMENTED)

				            except Exception as e:

				                health_statuses[provider_id] = HealthResponse(

				                    status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}"

				                )

				        return health_statuses

				class SafetyRouter(Safety):

				    def __init__(

Rows
Columns

feat: add health to all providers through providers endpoint (#1418)

26 llama_stack/distribution/routers/routers.py Unescape Escape View file

26

llama_stack/distribution/routers/routers.py

View file