Add an introspection "Api.inspect" API

2025-12-03 01:48:05 +00:00 · 2024-10-02 15:13:24 -07:00 · 2024-10-02 15:13:24 -07:00 · 8d049000e3
commit 8d049000e3
parent 01d93be948
14 changed files with 619 additions and 174 deletions
--- a/llama_stack/distribution/server/endpoints.py
+++ b/llama_stack/distribution/server/endpoints.py
@ -11,12 +11,14 @@ from pydantic import BaseModel

 from llama_stack.apis.agents import Agents
 from llama_stack.apis.inference import Inference
+from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.memory import Memory
 from llama_stack.apis.memory_banks import MemoryBanks
 from llama_stack.apis.models import Models
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
+
 from llama_stack.providers.datatypes import Api


@ -38,6 +40,7 @@ def get_all_api_endpoints() -> Dict[Api, List[ApiEndpoint]]:
        Api.models: Models,
        Api.shields: Shields,
        Api.memory_banks: MemoryBanks,
+        Api.inspect: Inspect,
    }

    for api, protocol in protocols.items():
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -15,7 +15,6 @@ from collections.abc import (
    AsyncIterator as AsyncIteratorABC,
 )
 from contextlib import asynccontextmanager
-from http import HTTPStatus
 from ssl import SSLError
 from typing import Any, AsyncGenerator, AsyncIterator, Dict, get_type_hints, Optional

@ -26,7 +25,6 @@ import yaml
 from fastapi import Body, FastAPI, HTTPException, Request, Response
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse, StreamingResponse
-from fastapi.routing import APIRoute
 from pydantic import BaseModel, ValidationError
 from termcolor import cprint
 from typing_extensions import Annotated
@ -287,15 +285,6 @@ def main(

    app = FastAPI()

-    # Health check is added to enable deploying the docker container image on Kubernetes which require
-    # a health check that can return 200 for readiness and liveness check
-    class HealthCheck(BaseModel):
-        status: str = "OK"
-
-    @app.get("/healthcheck", status_code=HTTPStatus.OK, response_model=HealthCheck)
-    async def healthcheck():
-        return HealthCheck(status="OK")
-
    impls, specs = asyncio.run(resolve_impls_with_routing(config))
    if Api.telemetry in impls:
        setup_logger(impls[Api.telemetry])
@ -307,6 +296,7 @@ def main(
    else:
        apis_to_serve = set(impls.keys())

+    apis_to_serve.add(Api.inspect)
    for api_str in apis_to_serve:
        api = Api(api_str)

@ -340,14 +330,11 @@ def main(
                    )
                )

-    for route in app.routes:
-        if isinstance(route, APIRoute):
-            cprint(
-                f"Serving {next(iter(route.methods))} {route.path}",
-                "white",
-                attrs=["bold"],
-            )
+        cprint(f"Serving API {api_str}", "white", attrs=["bold"])
+        for endpoint in endpoints:
+            cprint(f" {endpoint.method.upper()} {endpoint.route}", "white")

+    print("")
    app.exception_handler(RequestValidationError)(global_exception_handler)
    app.exception_handler(Exception)(global_exception_handler)
    signal.signal(signal.SIGINT, handle_sigint)