forked from phoenix/litellm-mirror
(feat) prometheus have well defined latency buckets (#6211)
* fix prometheus have well defined latency buckets * use a well define latency bucket * use types file for prometheus logging * add test for LATENCY_BUCKETS
This commit is contained in:
parent
4d1b4beb3d
commit
603299e3c8
4 changed files with 99 additions and 11 deletions
|
@ -16,13 +16,9 @@ import litellm
|
|||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.integrations.prometheus import *
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
REQUESTED_MODEL = "requested_model"
|
||||
EXCEPTION_STATUS = "exception_status"
|
||||
EXCEPTION_CLASS = "exception_class"
|
||||
EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
|
||||
|
||||
|
||||
class PrometheusLogger(CustomLogger):
|
||||
# Class variables or attributes
|
||||
|
@ -85,6 +81,7 @@ class PrometheusLogger(CustomLogger):
|
|||
"team",
|
||||
"team_alias",
|
||||
],
|
||||
buckets=LATENCY_BUCKETS,
|
||||
)
|
||||
|
||||
self.litellm_llm_api_latency_metric = Histogram(
|
||||
|
@ -97,6 +94,7 @@ class PrometheusLogger(CustomLogger):
|
|||
"team",
|
||||
"team_alias",
|
||||
],
|
||||
buckets=LATENCY_BUCKETS,
|
||||
)
|
||||
|
||||
# Counter for spend
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
model_list:
|
||||
- model_name: db-openai-endpoint
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/gpt-4
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["s3"]
|
||||
callbacks: ["prometheus"]
|
||||
turn_off_message_logging: true
|
||||
s3_callback_params:
|
||||
s3_bucket_name: load-testing-oct # AWS Bucket Name for S3
|
||||
|
|
42
litellm/types/integrations/prometheus.py
Normal file
42
litellm/types/integrations/prometheus.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
REQUESTED_MODEL = "requested_model"
|
||||
EXCEPTION_STATUS = "exception_status"
|
||||
EXCEPTION_CLASS = "exception_class"
|
||||
EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
|
||||
LATENCY_BUCKETS = (
|
||||
0.005,
|
||||
0.00625,
|
||||
0.0125,
|
||||
0.025,
|
||||
0.05,
|
||||
0.1,
|
||||
0.5,
|
||||
1.0,
|
||||
1.5,
|
||||
2.0,
|
||||
2.5,
|
||||
3.0,
|
||||
3.5,
|
||||
4.0,
|
||||
4.5,
|
||||
5.0,
|
||||
5.5,
|
||||
6.0,
|
||||
6.5,
|
||||
7.0,
|
||||
7.5,
|
||||
8.0,
|
||||
8.5,
|
||||
9.0,
|
||||
9.5,
|
||||
10.0,
|
||||
15.0,
|
||||
20.0,
|
||||
25.0,
|
||||
30.0,
|
||||
60.0,
|
||||
120.0,
|
||||
180.0,
|
||||
240.0,
|
||||
300.0,
|
||||
float("inf"),
|
||||
)
|
|
@ -6,6 +6,12 @@ import pytest
|
|||
import aiohttp
|
||||
import asyncio
|
||||
import uuid
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
async def make_bad_chat_completion_request(session, key):
|
||||
|
@ -148,10 +154,52 @@ async def test_proxy_success_metrics():
|
|||
in metrics
|
||||
)
|
||||
|
||||
# assert (
|
||||
# 'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
|
||||
# in metrics
|
||||
# )
|
||||
|
||||
verify_latency_metrics(metrics)
|
||||
|
||||
|
||||
def verify_latency_metrics(metrics: str):
|
||||
"""
|
||||
Assert that LATENCY_BUCKETS distribution is used for
|
||||
- litellm_request_total_latency_metric_bucket
|
||||
- litellm_llm_api_latency_metric_bucket
|
||||
"""
|
||||
from litellm.types.integrations.prometheus import LATENCY_BUCKETS
|
||||
import re
|
||||
|
||||
metric_names = [
|
||||
"litellm_request_total_latency_metric_bucket",
|
||||
"litellm_llm_api_latency_metric_bucket",
|
||||
]
|
||||
|
||||
for metric_name in metric_names:
|
||||
# Extract all 'le' values for the current metric
|
||||
pattern = rf'{metric_name}{{.*?le="(.*?)".*?}}'
|
||||
le_values = re.findall(pattern, metrics)
|
||||
|
||||
# Convert to set for easier comparison
|
||||
actual_buckets = set(le_values)
|
||||
|
||||
print("actual_buckets", actual_buckets)
|
||||
expected_buckets = []
|
||||
for bucket in LATENCY_BUCKETS:
|
||||
expected_buckets.append(str(bucket))
|
||||
|
||||
# replace inf with +Inf
|
||||
expected_buckets = [
|
||||
bucket.replace("inf", "+Inf") for bucket in expected_buckets
|
||||
]
|
||||
|
||||
print("expected_buckets", expected_buckets)
|
||||
expected_buckets = set(expected_buckets)
|
||||
# Verify all expected buckets are present
|
||||
assert (
|
||||
'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
|
||||
in metrics
|
||||
)
|
||||
actual_buckets == expected_buckets
|
||||
), f"Mismatch in {metric_name} buckets. Expected: {expected_buckets}, Got: {actual_buckets}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue