mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
feat: api level request metrics via middleware
add RequestMetricsMiddleware which tracks key metrics related to each request the LLS server will recieve: 1. llama_stack_requests_total: tracks the total amount of requests the server has processed 2. llama_stack_request_duration_seconds: tracks the duration of each request 3. llama_stack_concurrent_requests: tracks concurrently processed requests by the server The usage of a middleware allows this to be done on the server level without having to add custom handling to each router like the inference router has today for its API specific metrics. Also, add some unit tests for this functionality resolves #2597 Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
dbfc15123e
commit
49b729b30a
4 changed files with 433 additions and 0 deletions
|
@ -76,6 +76,7 @@ from llama_stack.providers.utils.telemetry.tracing import (
|
|||
)
|
||||
|
||||
from .auth import AuthenticationMiddleware
|
||||
from .metrics import RequestMetricsMiddleware
|
||||
from .quota import QuotaMiddleware
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent.parent.parent
|
||||
|
@ -536,6 +537,10 @@ def main(args: argparse.Namespace | None = None):
|
|||
app.__llama_stack_impls__ = impls
|
||||
app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
|
||||
|
||||
# Add request metrics middleware
|
||||
telemetry_impl = impls.get(Api.telemetry) if Api.telemetry in impls else None
|
||||
app.add_middleware(RequestMetricsMiddleware, telemetry=telemetry_impl)
|
||||
|
||||
import uvicorn
|
||||
|
||||
# Configure SSL if certificates are provided
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue