Litellm dev 01 01 2025 p1 (#7498)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 11s

* refactor(prometheus.py): refactor to remove `_tag` metrics and incorporate in regular metrics

* fix(prometheus.py): handle label values not set in enum values

* feat(prometheus.py): working e2e custom metadata labels

* docs(prometheus.md): update docs to clarify how custom metrics would work

* test(test_prometheus_unit_tests.py): fix test

* test: add unit testing
This commit is contained in:
Krish Dholakia 2025-01-01 18:59:28 -08:00 committed by GitHub
parent 0120176541
commit 07fc394072
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 126 additions and 271 deletions

View file

@ -38,51 +38,35 @@ class PrometheusLogger(CustomLogger):
self.litellm_proxy_failed_requests_metric = Counter(
name="litellm_proxy_failed_requests_metric",
documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
labelnames=PrometheusMetricLabels.litellm_proxy_failed_requests_metric.value,
)
self.litellm_proxy_failed_requests_by_tag_metric = Counter(
name="litellm_proxy_failed_requests_by_tag_metric",
documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
labelnames=PrometheusMetricLabels.litellm_proxy_failed_requests_by_tag_metric.value,
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_proxy_failed_requests_metric"
),
)
self.litellm_proxy_total_requests_metric = Counter(
name="litellm_proxy_total_requests_metric",
documentation="Total number of requests made to the proxy server - track number of client side requests",
labelnames=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value,
)
self.litellm_proxy_total_requests_by_tag_metric = Counter(
name="litellm_proxy_total_requests_by_tag_metric",
documentation="Total number of requests made to the proxy server - track number of client side requests by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_proxy_total_requests_metric"
),
)
# request latency metrics
self.litellm_request_total_latency_metric = Histogram(
"litellm_request_total_latency_metric",
"Total latency (seconds) for a request to LiteLLM",
labelnames=PrometheusMetricLabels.litellm_request_total_latency_metric.value,
buckets=LATENCY_BUCKETS,
)
self.litellm_request_total_latency_by_tag_metric = Histogram(
"litellm_request_total_latency_by_tag_metric",
"Total latency (seconds) for a request to LiteLLM by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_request_total_latency_by_tag_metric.value,
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_request_total_latency_metric"
),
buckets=LATENCY_BUCKETS,
)
self.litellm_llm_api_latency_metric = Histogram(
"litellm_llm_api_latency_metric",
"Total latency (seconds) for a models LLM API call",
labelnames=PrometheusMetricLabels.litellm_llm_api_latency_metric.value,
buckets=LATENCY_BUCKETS,
)
self.litellm_llm_api_latency_by_tag_metric = Histogram(
"litellm_llm_api_latency_by_tag_metric",
"Total latency (seconds) for a models LLM API call by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_llm_api_latency_by_tag_metric.value,
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_llm_api_latency_metric"
),
buckets=LATENCY_BUCKETS,
)
@ -129,42 +113,20 @@ class PrometheusLogger(CustomLogger):
],
)
# Counter for tokens by tag
self.litellm_tokens_by_tag_metric = Counter(
"litellm_total_tokens_by_tag",
"Total number of input + output tokens from LLM requests by custom metadata tags",
labelnames=[
UserAPIKeyLabelNames.TAG.value,
],
)
self.litellm_input_tokens_metric = Counter(
"litellm_input_tokens",
"Total number of input tokens from LLM requests",
labelnames=PrometheusMetricLabels.litellm_input_tokens_metric.value,
)
# Counter for input tokens by tag
self.litellm_input_tokens_by_tag_metric = Counter(
"litellm_input_tokens_by_tag",
"Total number of input tokens from LLM requests by custom metadata tags",
labelnames=[
UserAPIKeyLabelNames.TAG.value,
],
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_input_tokens_metric"
),
)
self.litellm_output_tokens_metric = Counter(
"litellm_output_tokens",
"Total number of output tokens from LLM requests",
labelnames=PrometheusMetricLabels.litellm_output_tokens_metric.value,
)
# Counter for output tokens by tag
self.litellm_output_tokens_by_tag_metric = Counter(
"litellm_output_tokens_by_tag",
"Total number of output tokens from LLM requests by custom metadata tags",
labelnames=[
UserAPIKeyLabelNames.TAG.value,
],
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_output_tokens_metric"
),
)
# Remaining Budget for Team
@ -301,36 +263,25 @@ class PrometheusLogger(CustomLogger):
self.litellm_deployment_latency_per_output_token = Histogram(
name="litellm_deployment_latency_per_output_token",
documentation="LLM Deployment Analytics - Latency per output token",
labelnames=PrometheusMetricLabels.litellm_deployment_latency_per_output_token.value,
)
self.litellm_deployment_latency_per_output_token_by_tag = Histogram(
name="litellm_deployment_latency_per_output_token_by_tag",
documentation="LLM Deployment Analytics - Latency per output token by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_deployment_latency_per_output_token_by_tag.value,
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_deployment_latency_per_output_token"
),
)
self.litellm_deployment_successful_fallbacks = Counter(
"litellm_deployment_successful_fallbacks",
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
PrometheusMetricLabels.litellm_deployment_successful_fallbacks.value,
)
self.litellm_deployment_successful_fallbacks_by_tag = Counter(
"litellm_deployment_successful_fallbacks_by_tag",
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model by custom metadata tags",
PrometheusMetricLabels.litellm_deployment_successful_fallbacks_by_tag.value,
PrometheusMetricLabels.get_labels(
"litellm_deployment_successful_fallbacks"
),
)
self.litellm_deployment_failed_fallbacks = Counter(
"litellm_deployment_failed_fallbacks",
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
PrometheusMetricLabels.litellm_deployment_failed_fallbacks.value,
)
self.litellm_deployment_failed_fallbacks_by_tag = Counter(
"litellm_deployment_failed_fallbacks_by_tag",
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model by custom metadata tags",
PrometheusMetricLabels.litellm_deployment_failed_fallbacks_by_tag.value,
PrometheusMetricLabels.get_labels(
"litellm_deployment_failed_fallbacks"
),
)
self.litellm_llm_api_failed_requests_metric = Counter(
@ -350,7 +301,9 @@ class PrometheusLogger(CustomLogger):
self.litellm_requests_metric = Counter(
name="litellm_requests_metric",
documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user",
labelnames=PrometheusMetricLabels.litellm_requests_metric.value,
labelnames=PrometheusMetricLabels.get_labels(
label_name="litellm_requests_metric"
),
)
except Exception as e:
@ -399,9 +352,7 @@ class PrometheusLogger(CustomLogger):
if standard_logging_payload is not None and isinstance(
standard_logging_payload, dict
):
_tags = get_tags_from_standard_logging_payload(
cast(StandardLoggingPayload, standard_logging_payload)
)
_tags = standard_logging_payload["request_tags"]
else:
_tags = []
@ -426,6 +377,10 @@ class PrometheusLogger(CustomLogger):
api_provider=standard_logging_payload["custom_llm_provider"],
exception_status=None,
exception_class=None,
custom_metadata_labels=get_custom_labels_from_metadata(
metadata=standard_logging_payload["metadata"].get("requester_metadata")
or {}
),
)
if (
@ -507,19 +462,13 @@ class PrometheusLogger(CustomLogger):
standard_logging_payload["stream"] is True
): # log successful streaming requests from logging event hook.
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_proxy_total_requests_metric"
),
enum_values=enum_values,
)
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_total_requests_by_tag_metric.labels(**_labels).inc()
def _increment_token_metrics(
self,
standard_logging_payload: StandardLoggingPayload,
@ -546,31 +495,22 @@ class PrometheusLogger(CustomLogger):
if standard_logging_payload is not None and isinstance(
standard_logging_payload, dict
):
_tags = get_tags_from_standard_logging_payload(standard_logging_payload)
for tag in _tags:
self.litellm_tokens_by_tag_metric.labels(
**{
UserAPIKeyLabelNames.TAG.value: tag,
}
).inc(standard_logging_payload["total_tokens"])
_tags = standard_logging_payload["request_tags"]
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_input_tokens_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_input_tokens_metric"
),
enum_values=enum_values,
)
self.litellm_input_tokens_metric.labels(**_labels).inc(
standard_logging_payload["prompt_tokens"]
)
for tag in _tags:
self.litellm_input_tokens_by_tag_metric.labels(
**{
UserAPIKeyLabelNames.TAG.value: tag,
}
).inc(standard_logging_payload["prompt_tokens"])
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_output_tokens_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_output_tokens_metric"
),
enum_values=enum_values,
)
@ -578,13 +518,6 @@ class PrometheusLogger(CustomLogger):
standard_logging_payload["completion_tokens"]
)
for tag in _tags:
self.litellm_output_tokens_by_tag_metric.labels(
**{
UserAPIKeyLabelNames.TAG.value: tag,
}
).inc(standard_logging_payload["completion_tokens"])
def _increment_remaining_budget_metrics(
self,
user_api_team: Optional[str],
@ -634,7 +567,9 @@ class PrometheusLogger(CustomLogger):
enum_values: UserAPIKeyLabelValues,
):
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_requests_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_requests_metric"
),
enum_values=enum_values,
)
self.litellm_requests_metric.labels(**_labels).inc()
@ -728,44 +663,29 @@ class PrometheusLogger(CustomLogger):
api_call_total_time: timedelta = end_time - api_call_start_time
api_call_total_time_seconds = api_call_total_time.total_seconds()
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_llm_api_latency_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_llm_api_latency_metric"
),
enum_values=enum_values,
)
self.litellm_llm_api_latency_metric.labels(**_labels).observe(
api_call_total_time_seconds
)
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_llm_api_latency_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_llm_api_latency_by_tag_metric.labels(**_labels).observe(
api_call_total_time_seconds
)
# total request latency
if start_time is not None and isinstance(start_time, datetime):
total_time: timedelta = end_time - start_time
total_time_seconds = total_time.total_seconds()
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_request_total_latency_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_request_total_latency_metric"
),
enum_values=enum_values,
)
self.litellm_request_total_latency_metric.labels(**_labels).observe(
total_time_seconds
)
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_request_total_latency_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_request_total_latency_by_tag_metric.labels(
**_labels
).observe(total_time_seconds)
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
from litellm.types.utils import StandardLoggingPayload
@ -831,12 +751,6 @@ class PrometheusLogger(CustomLogger):
"""
try:
_tags = cast(List[str], request_data.get("tags") or [])
request_metadata = request_data.get("metadata", {})
metadata_tags: Optional[List[str]] = None
if request_metadata is not None and isinstance(request_metadata, dict):
metadata_tags = get_tag_from_metadata(metadata=request_metadata)
if metadata_tags is not None:
_tags.extend(metadata_tags)
enum_values = UserAPIKeyLabelValues(
end_user=user_api_key_dict.end_user_id,
user=user_api_key_dict.user_id,
@ -851,32 +765,21 @@ class PrometheusLogger(CustomLogger):
tags=_tags,
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_failed_requests_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_proxy_failed_requests_metric"
),
enum_values=enum_values,
)
self.litellm_proxy_failed_requests_metric.labels(**_labels).inc()
for tag in _tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_failed_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_failed_requests_by_tag_metric.labels(**_labels).inc()
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_proxy_total_requests_metric"
),
enum_values=enum_values,
)
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_total_requests_by_tag_metric.labels(**_labels).inc()
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))
@ -901,18 +804,13 @@ class PrometheusLogger(CustomLogger):
status_code="200",
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_proxy_total_requests_metric"
),
enum_values=enum_values,
)
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_total_requests_by_tag_metric.labels(**_labels).inc()
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))
@ -979,9 +877,7 @@ class PrometheusLogger(CustomLogger):
if standard_logging_payload is not None and isinstance(
standard_logging_payload, dict
):
_tags = get_tags_from_standard_logging_payload(
cast(StandardLoggingPayload, standard_logging_payload)
)
_tags = standard_logging_payload["request_tags"]
for tag in _tags:
self.litellm_deployment_failure_by_tag_responses.labels(
**{
@ -1161,7 +1057,9 @@ class PrometheusLogger(CustomLogger):
if output_tokens is not None and output_tokens > 0:
latency_per_token = _latency_seconds / output_tokens
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_latency_per_output_token.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_deployment_latency_per_output_token"
),
enum_values=enum_values,
)
self.litellm_deployment_latency_per_output_token.labels(
@ -1215,19 +1113,13 @@ class PrometheusLogger(CustomLogger):
tags=_tags,
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_successful_fallbacks.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_deployment_successful_fallbacks"
),
enum_values=enum_values,
)
self.litellm_deployment_successful_fallbacks.labels(**_labels).inc()
for tag in _tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_successful_fallbacks_by_tag.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_deployment_successful_fallbacks_by_tag.labels(**_labels).inc()
async def log_failure_fallback_event(
self, original_model_group: str, kwargs: dict, original_exception: Exception
):
@ -1266,19 +1158,13 @@ class PrometheusLogger(CustomLogger):
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_failed_fallbacks.value,
supported_enum_labels=PrometheusMetricLabels.get_labels(
label_name="litellm_deployment_failed_fallbacks"
),
enum_values=enum_values,
)
self.litellm_deployment_failed_fallbacks.labels(**_labels).inc()
for tag in _tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_failed_fallbacks_by_tag.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_deployment_failed_fallbacks_by_tag.labels(**_labels).inc()
def set_litellm_deployment_state(
self,
state: int,
@ -1384,44 +1270,37 @@ def prometheus_label_factory(
if label in supported_enum_labels
}
if tag and "tag" in supported_enum_labels:
filtered_labels["tag"] = tag
if UserAPIKeyLabelNames.END_USER.value in filtered_labels:
filtered_labels["end_user"] = get_end_user_id_for_cost_tracking(
litellm_params={"user_api_key_end_user_id": enum_values.end_user},
service_type="prometheus",
)
if enum_values.custom_metadata_labels is not None:
for key, value in enum_values.custom_metadata_labels.items():
if key in supported_enum_labels:
filtered_labels[key] = value
for label in supported_enum_labels:
if label not in filtered_labels:
filtered_labels[label] = None
return filtered_labels
def get_tags_from_standard_logging_payload(
standard_logging_payload: StandardLoggingPayload,
) -> List[str]:
_tags = standard_logging_payload["request_tags"]
_requester_metadata = standard_logging_payload["metadata"].get("requester_metadata")
metadata_tags: Optional[List[str]] = None
if _requester_metadata is not None:
metadata_tags = get_tag_from_metadata(metadata=_requester_metadata)
if metadata_tags is not None:
_tags.extend(metadata_tags)
return _tags
def get_tag_from_metadata(metadata: dict) -> Optional[List[str]]:
def get_custom_labels_from_metadata(metadata: dict) -> Dict[str, str]:
"""
Get tag from metadata
Get custom labels from metadata
"""
keys = litellm.custom_prometheus_metadata_labels
if keys is None or len(keys) == 0:
return None
return {}
result: List[str] = []
result: Dict[str, str] = {}
for key in keys:
# Split the dot notation key into parts
original_key = key
key = key.replace("metadata.", "", 1) if key.startswith("metadata.") else key
keys_parts = key.split(".")
@ -1433,6 +1312,6 @@ def get_tag_from_metadata(metadata: dict) -> Optional[List[str]]:
break
if value is not None and isinstance(value, str):
result.append(value)
result[original_key.replace(".", "_")] = value
return result