fix litellm_overhead_latency_metric

2025-04-27 03:34:10 +00:00 · 2025-01-21 21:33:31 -08:00 · 2025-01-21 21:33:31 -08:00 · a57b8f6802
commit a57b8f6802
parent 3f053fc99c
2 changed files with 4 additions and 1 deletions
--- a/docs/my-website/docs/proxy/prometheus.md
+++ b/docs/my-website/docs/proxy/prometheus.md
@ -135,6 +135,7 @@ Use this for LLM API Error monitoring and tracking remaining rate limits and tok
 | Metric Name          | Description                          |
 |----------------------|--------------------------------------|
 | `litellm_request_total_latency_metric`             | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels "end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "model" |
+| `litellm_overhead_latency_metric`             | Latency overhead (seconds) added by LiteLLM processing - tracked for labels "end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "model" |
 | `litellm_llm_api_latency_metric`  | Latency (seconds) for just the LLM API call - tracked for labels "model", "hashed_api_key", "api_key_alias", "team", "team_alias", "requested_model", "end_user", "user" |
 | `litellm_llm_api_time_to_first_token_metric`             | Time to first token for LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` [Note: only emitted for streaming requests] |

--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -1011,7 +1011,9 @@ class PrometheusLogger(CustomLogger):
                    litellm_model_name,
                    standard_logging_payload["metadata"]["user_api_key_hash"],
                    standard_logging_payload["metadata"]["user_api_key_alias"],
-                ).observe(litellm_overhead_time_ms)
+                ).observe(
+                    litellm_overhead_time_ms / 1000
+                )  # set as seconds

            if remaining_requests:
                """