From e2cdb00a810d2c6dde0b837f8b83490dc3715602 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 22 Aug 2024 13:52:03 -0700 Subject: [PATCH 1/4] track api_call_start_time --- litellm/litellm_core_utils/litellm_logging.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index d59f98558..dbf2a7d3e 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -354,6 +354,8 @@ class Logging: str(e) ) ) + + self.model_call_details["api_call_start_time"] = datetime.datetime.now() # Input Integration Logging -> If you want to log the fact that an attempt to call the model was made callbacks = litellm.input_callback + self.dynamic_input_callbacks for callback in callbacks: From 06a362d35fb25c4c0ab7bd239fb765a50c48392e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 22 Aug 2024 13:58:10 -0700 Subject: [PATCH 2/4] track litellm_request_latency_metric --- litellm/integrations/prometheus.py | 38 ++++++++++++++++++++++++++++++ litellm/proxy/proxy_config.yaml | 4 +++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 1471f59b7..dadafa80e 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -60,6 +60,25 @@ class PrometheusLogger(CustomLogger): ], ) + # request latency metrics + self.litellm_request_latency_metric = Histogram( + "litellm_request_latency_metric", + "Total latency (seconds) for a request to LiteLLM", + labelnames=[ + "model", + "litellm_call_id", + ], + ) + + self.litellm_deployment_latency_metric = Histogram( + "litellm_deployment_latency_metric", + "Total latency (seconds) for a models LLM API call", + labelnames=[ + "model", + "litellm_call_id", + ], + ) + # Counter for spend self.litellm_spend_metric = Counter( "litellm_spend_metric", @@ -329,6 +348,25 @@ class PrometheusLogger(CustomLogger): user_api_key, user_api_key_alias, model_group ).set(remaining_tokens) + # latency metrics + total_time: timedelta = kwargs.get("end_time") - kwargs.get("start_time") + total_time_seconds = total_time.total_seconds() + api_call_total_time: timedelta = kwargs.get("end_time") - kwargs.get( + "api_call_start_time" + ) + + api_call_total_time_seconds = api_call_total_time.total_seconds() + + litellm_call_id = kwargs.get("litellm_call_id") + + self.litellm_request_latency_metric.labels(model, litellm_call_id).observe( + total_time_seconds + ) + + self.litellm_deployment_latency_metric.labels(model, litellm_call_id).observe( + api_call_total_time_seconds + ) + # set x-ratelimit headers if premium_user is True: self.set_llm_deployment_success_metrics( diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 65c7f7052..7c524eb18 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -4,7 +4,9 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - +litellm_settings: + success_callback: ["prometheus"] + failure_callback: ["prometheus"] guardrails: - guardrail_name: "lakera-pre-guard" litellm_params: From 36b550b8db404999829985271be52377145f87f4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 22 Aug 2024 14:03:00 -0700 Subject: [PATCH 3/4] update promtheus metric names --- litellm/integrations/prometheus.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index dadafa80e..659e5b193 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -61,8 +61,8 @@ class PrometheusLogger(CustomLogger): ) # request latency metrics - self.litellm_request_latency_metric = Histogram( - "litellm_request_latency_metric", + self.litellm_request_total_latency_metric = Histogram( + "litellm_request_total_latency_metric", "Total latency (seconds) for a request to LiteLLM", labelnames=[ "model", @@ -70,8 +70,8 @@ class PrometheusLogger(CustomLogger): ], ) - self.litellm_deployment_latency_metric = Histogram( - "litellm_deployment_latency_metric", + self.litellm_llm_api_latency_metric = Histogram( + "litellm_llm_api_latency_metric", "Total latency (seconds) for a models LLM API call", labelnames=[ "model", @@ -359,11 +359,11 @@ class PrometheusLogger(CustomLogger): litellm_call_id = kwargs.get("litellm_call_id") - self.litellm_request_latency_metric.labels(model, litellm_call_id).observe( - total_time_seconds - ) + self.litellm_request_total_latency_metric.labels( + model, litellm_call_id + ).observe(total_time_seconds) - self.litellm_deployment_latency_metric.labels(model, litellm_call_id).observe( + self.litellm_llm_api_latency_metric.labels(model, litellm_call_id).observe( api_call_total_time_seconds ) From 57707b04b6993f9e375569fcc98caca8b9177177 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 22 Aug 2024 14:06:14 -0700 Subject: [PATCH 4/4] add prom docs for Request Latency Metrics --- docs/my-website/docs/proxy/prometheus.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md index 4b913d2e8..10e6456c2 100644 --- a/docs/my-website/docs/proxy/prometheus.md +++ b/docs/my-website/docs/proxy/prometheus.md @@ -68,6 +68,15 @@ http://localhost:4000/metrics | `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` | | `litellm_llm_api_failed_requests_metric` | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"` | +### Request Latency Metrics + +| Metric Name | Description | +|----------------------|--------------------------------------| +| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels `litellm_call_id`, `model` | +| `litellm_llm_api_latency_metric` | latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model` | + + + ### LLM API / Provider Metrics | Metric Name | Description |