(feat) prometheus have well defined latency buckets (#6211)

* fix prometheus have well defined latency buckets * use a well define latency bucket * use types file for prometheus logging * add test for LATENCY_BUCKETS
2024-10-14 17:16:01 +05:30 · 2024-10-14 17:16:01 +05:30 · 603299e3c8
commit 603299e3c8
parent 4d1b4beb3d
4 changed files with 99 additions and 11 deletions
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@ -16,13 +16,9 @@ import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.integrations.prometheus import *
 from litellm.types.utils import StandardLoggingPayload

-REQUESTED_MODEL = "requested_model"
-EXCEPTION_STATUS = "exception_status"
-EXCEPTION_CLASS = "exception_class"
-EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
-

 class PrometheusLogger(CustomLogger):
    # Class variables or attributes
@ -85,6 +81,7 @@ class PrometheusLogger(CustomLogger):
                    "team",
                    "team_alias",
                ],
+                buckets=LATENCY_BUCKETS,
            )

            self.litellm_llm_api_latency_metric = Histogram(
@ -97,6 +94,7 @@ class PrometheusLogger(CustomLogger):
                    "team",
                    "team_alias",
                ],
+                buckets=LATENCY_BUCKETS,
            )

            # Counter for spend
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,12 +1,12 @@
 model_list:
-  - model_name: db-openai-endpoint
+  - model_name: fake-openai-endpoint
    litellm_params:
-      model: openai/gpt-4
+      model: openai/fake
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app/

 litellm_settings:
-  success_callback: ["s3"]
+  callbacks: ["prometheus"]
  turn_off_message_logging: true
  s3_callback_params:
    s3_bucket_name: load-testing-oct   # AWS Bucket Name for S3
--- a/litellm/types/integrations/prometheus.py
+++ b/litellm/types/integrations/prometheus.py
@ -0,0 +1,42 @@
+REQUESTED_MODEL = "requested_model"
+EXCEPTION_STATUS = "exception_status"
+EXCEPTION_CLASS = "exception_class"
+EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
+LATENCY_BUCKETS = (
+    0.005,
+    0.00625,
+    0.0125,
+    0.025,
+    0.05,
+    0.1,
+    0.5,
+    1.0,
+    1.5,
+    2.0,
+    2.5,
+    3.0,
+    3.5,
+    4.0,
+    4.5,
+    5.0,
+    5.5,
+    6.0,
+    6.5,
+    7.0,
+    7.5,
+    8.0,
+    8.5,
+    9.0,
+    9.5,
+    10.0,
+    15.0,
+    20.0,
+    25.0,
+    30.0,
+    60.0,
+    120.0,
+    180.0,
+    240.0,
+    300.0,
+    float("inf"),
+)
--- a/tests/otel_tests/test_prometheus.py
+++ b/tests/otel_tests/test_prometheus.py
@ -6,6 +6,12 @@ import pytest
 import aiohttp
 import asyncio
 import uuid
+import os
+import sys
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path


 async def make_bad_chat_completion_request(session, key):
@ -148,10 +154,52 @@ async def test_proxy_success_metrics():
            in metrics
        )

+        # assert (
+        #     'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
+        #     in metrics
+        # )
+
+        verify_latency_metrics(metrics)
+
+
+def verify_latency_metrics(metrics: str):
+    """
+    Assert that LATENCY_BUCKETS distribution is used for
+    - litellm_request_total_latency_metric_bucket
+    - litellm_llm_api_latency_metric_bucket
+    """
+    from litellm.types.integrations.prometheus import LATENCY_BUCKETS
+    import re
+
+    metric_names = [
+        "litellm_request_total_latency_metric_bucket",
+        "litellm_llm_api_latency_metric_bucket",
+    ]
+
+    for metric_name in metric_names:
+        # Extract all 'le' values for the current metric
+        pattern = rf'{metric_name}{{.*?le="(.*?)".*?}}'
+        le_values = re.findall(pattern, metrics)
+
+        # Convert to set for easier comparison
+        actual_buckets = set(le_values)
+
+        print("actual_buckets", actual_buckets)
+        expected_buckets = []
+        for bucket in LATENCY_BUCKETS:
+            expected_buckets.append(str(bucket))
+
+        # replace inf with +Inf
+        expected_buckets = [
+            bucket.replace("inf", "+Inf") for bucket in expected_buckets
+        ]
+
+        print("expected_buckets", expected_buckets)
+        expected_buckets = set(expected_buckets)
+        # Verify all expected buckets are present
        assert (
-            'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
-            in metrics
-        )
+            actual_buckets == expected_buckets
+        ), f"Mismatch in {metric_name} buckets. Expected: {expected_buckets}, Got: {actual_buckets}"


@pytest.mark.asyncio