(feat) prometheus have well defined latency buckets (#6211)

* fix prometheus have well defined latency buckets

* use a well define latency bucket

* use types file for prometheus logging

* add test for LATENCY_BUCKETS
This commit is contained in:
Ishaan Jaff 2024-10-14 17:16:01 +05:30 committed by GitHub
parent 4d1b4beb3d
commit 603299e3c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 99 additions and 11 deletions

View file

@ -16,13 +16,9 @@ import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.integrations.prometheus import *
from litellm.types.utils import StandardLoggingPayload
REQUESTED_MODEL = "requested_model"
EXCEPTION_STATUS = "exception_status"
EXCEPTION_CLASS = "exception_class"
EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
class PrometheusLogger(CustomLogger):
# Class variables or attributes
@ -85,6 +81,7 @@ class PrometheusLogger(CustomLogger):
"team",
"team_alias",
],
buckets=LATENCY_BUCKETS,
)
self.litellm_llm_api_latency_metric = Histogram(
@ -97,6 +94,7 @@ class PrometheusLogger(CustomLogger):
"team",
"team_alias",
],
buckets=LATENCY_BUCKETS,
)
# Counter for spend

View file

@ -1,12 +1,12 @@
model_list:
- model_name: db-openai-endpoint
- model_name: fake-openai-endpoint
litellm_params:
model: openai/gpt-4
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
litellm_settings:
success_callback: ["s3"]
callbacks: ["prometheus"]
turn_off_message_logging: true
s3_callback_params:
s3_bucket_name: load-testing-oct # AWS Bucket Name for S3

View file

@ -0,0 +1,42 @@
REQUESTED_MODEL = "requested_model"
EXCEPTION_STATUS = "exception_status"
EXCEPTION_CLASS = "exception_class"
EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
LATENCY_BUCKETS = (
0.005,
0.00625,
0.0125,
0.025,
0.05,
0.1,
0.5,
1.0,
1.5,
2.0,
2.5,
3.0,
3.5,
4.0,
4.5,
5.0,
5.5,
6.0,
6.5,
7.0,
7.5,
8.0,
8.5,
9.0,
9.5,
10.0,
15.0,
20.0,
25.0,
30.0,
60.0,
120.0,
180.0,
240.0,
300.0,
float("inf"),
)

View file

@ -6,6 +6,12 @@ import pytest
import aiohttp
import asyncio
import uuid
import os
import sys
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
async def make_bad_chat_completion_request(session, key):
@ -148,10 +154,52 @@ async def test_proxy_success_metrics():
in metrics
)
# assert (
# 'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
# in metrics
# )
verify_latency_metrics(metrics)
def verify_latency_metrics(metrics: str):
"""
Assert that LATENCY_BUCKETS distribution is used for
- litellm_request_total_latency_metric_bucket
- litellm_llm_api_latency_metric_bucket
"""
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
import re
metric_names = [
"litellm_request_total_latency_metric_bucket",
"litellm_llm_api_latency_metric_bucket",
]
for metric_name in metric_names:
# Extract all 'le' values for the current metric
pattern = rf'{metric_name}{{.*?le="(.*?)".*?}}'
le_values = re.findall(pattern, metrics)
# Convert to set for easier comparison
actual_buckets = set(le_values)
print("actual_buckets", actual_buckets)
expected_buckets = []
for bucket in LATENCY_BUCKETS:
expected_buckets.append(str(bucket))
# replace inf with +Inf
expected_buckets = [
bucket.replace("inf", "+Inf") for bucket in expected_buckets
]
print("expected_buckets", expected_buckets)
expected_buckets = set(expected_buckets)
# Verify all expected buckets are present
assert (
'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
in metrics
)
actual_buckets == expected_buckets
), f"Mismatch in {metric_name} buckets. Expected: {expected_buckets}, Got: {actual_buckets}"
@pytest.mark.asyncio