diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index b971a4e5e..f67caa405 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -16,13 +16,9 @@ import litellm from litellm._logging import print_verbose, verbose_logger from litellm.integrations.custom_logger import CustomLogger from litellm.proxy._types import UserAPIKeyAuth +from litellm.types.integrations.prometheus import * from litellm.types.utils import StandardLoggingPayload -REQUESTED_MODEL = "requested_model" -EXCEPTION_STATUS = "exception_status" -EXCEPTION_CLASS = "exception_class" -EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS] - class PrometheusLogger(CustomLogger): # Class variables or attributes @@ -85,6 +81,7 @@ class PrometheusLogger(CustomLogger): "team", "team_alias", ], + buckets=LATENCY_BUCKETS, ) self.litellm_llm_api_latency_metric = Histogram( @@ -97,6 +94,7 @@ class PrometheusLogger(CustomLogger): "team", "team_alias", ], + buckets=LATENCY_BUCKETS, ) # Counter for spend diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 11ccc8561..c71e3fd40 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,12 +1,12 @@ model_list: - - model_name: db-openai-endpoint + - model_name: fake-openai-endpoint litellm_params: - model: openai/gpt-4 + model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ litellm_settings: - success_callback: ["s3"] + callbacks: ["prometheus"] turn_off_message_logging: true s3_callback_params: s3_bucket_name: load-testing-oct # AWS Bucket Name for S3 diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py new file mode 100644 index 000000000..d09ed9670 --- /dev/null +++ b/litellm/types/integrations/prometheus.py @@ -0,0 +1,42 @@ +REQUESTED_MODEL = "requested_model" +EXCEPTION_STATUS = "exception_status" +EXCEPTION_CLASS = "exception_class" +EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS] +LATENCY_BUCKETS = ( + 0.005, + 0.00625, + 0.0125, + 0.025, + 0.05, + 0.1, + 0.5, + 1.0, + 1.5, + 2.0, + 2.5, + 3.0, + 3.5, + 4.0, + 4.5, + 5.0, + 5.5, + 6.0, + 6.5, + 7.0, + 7.5, + 8.0, + 8.5, + 9.0, + 9.5, + 10.0, + 15.0, + 20.0, + 25.0, + 30.0, + 60.0, + 120.0, + 180.0, + 240.0, + 300.0, + float("inf"), +) diff --git a/tests/otel_tests/test_prometheus.py b/tests/otel_tests/test_prometheus.py index 9bda4cd10..0de1c9896 100644 --- a/tests/otel_tests/test_prometheus.py +++ b/tests/otel_tests/test_prometheus.py @@ -6,6 +6,12 @@ import pytest import aiohttp import asyncio import uuid +import os +import sys + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path async def make_bad_chat_completion_request(session, key): @@ -148,10 +154,52 @@ async def test_proxy_success_metrics(): in metrics ) + # assert ( + # 'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}' + # in metrics + # ) + + verify_latency_metrics(metrics) + + +def verify_latency_metrics(metrics: str): + """ + Assert that LATENCY_BUCKETS distribution is used for + - litellm_request_total_latency_metric_bucket + - litellm_llm_api_latency_metric_bucket + """ + from litellm.types.integrations.prometheus import LATENCY_BUCKETS + import re + + metric_names = [ + "litellm_request_total_latency_metric_bucket", + "litellm_llm_api_latency_metric_bucket", + ] + + for metric_name in metric_names: + # Extract all 'le' values for the current metric + pattern = rf'{metric_name}{{.*?le="(.*?)".*?}}' + le_values = re.findall(pattern, metrics) + + # Convert to set for easier comparison + actual_buckets = set(le_values) + + print("actual_buckets", actual_buckets) + expected_buckets = [] + for bucket in LATENCY_BUCKETS: + expected_buckets.append(str(bucket)) + + # replace inf with +Inf + expected_buckets = [ + bucket.replace("inf", "+Inf") for bucket in expected_buckets + ] + + print("expected_buckets", expected_buckets) + expected_buckets = set(expected_buckets) + # Verify all expected buckets are present assert ( - 'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}' - in metrics - ) + actual_buckets == expected_buckets + ), f"Mismatch in {metric_name} buckets. Expected: {expected_buckets}, Got: {actual_buckets}" @pytest.mark.asyncio