Litellm dev 12 26 2024 p3 (#7434)

* build(model_prices_and_context_window.json): update groq models to specify 'supports_vision' parameter

Closes https://github.com/BerriAI/litellm/issues/7433

* docs(groq.md): add groq vision example to docs

Closes https://github.com/BerriAI/litellm/issues/7433

* fix(prometheus.py): refactor self.litellm_proxy_failed_requests_metric to use label factory

* feat(prometheus.py): new 'litellm_proxy_failed_requests_by_tag_metric'

allows tracking failed requests by tag on proxy

* fix(prometheus.py): fix exception logging

* feat(prometheus.py): add new 'litellm_request_total_latency_by_tag_metric'

enables tracking latency by use-case

* feat(prometheus.py): add new llm api latency by tag metric

* feat(prometheus.py): new litellm_deployment_latency_per_output_token_by_tag metric

allows tracking deployment latency by tag

* fix(prometheus.py): refactor 'litellm_requests_metric' to use enum values + label factory

* feat(prometheus.py): new litellm_proxy_total_requests_by_tag metric

allows tracking total requests by tag

* feat(prometheus.py): new metric litellm_deployment_successful_fallbacks_by_tag

allows tracking deployment fallbacks by tag

* fix(prometheus.py): new 'litellm_deployment_failed_fallbacks_by_tag' metric

allows tracking failed fallbacks on deployment by custom tag

* test: fix test

* test: rename test to run earlier

* test: skip flaky test
This commit is contained in:
Krish Dholakia 2024-12-26 21:21:16 -08:00 committed by GitHub
parent c72f1aeeda
commit f30260343b
7 changed files with 389 additions and 78 deletions

View file

@ -3,7 +3,7 @@
# On success, log events to Prometheus
import sys
from datetime import datetime, timedelta
from typing import List, Optional
from typing import List, Optional, cast
from litellm._logging import print_verbose, verbose_logger
from litellm.integrations.custom_logger import CustomLogger
@ -37,16 +37,12 @@ class PrometheusLogger(CustomLogger):
self.litellm_proxy_failed_requests_metric = Counter(
name="litellm_proxy_failed_requests_metric",
documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
labelnames=[
"end_user",
"hashed_api_key",
"api_key_alias",
REQUESTED_MODEL,
"team",
"team_alias",
"user",
]
+ EXCEPTION_LABELS,
labelnames=PrometheusMetricLabels.litellm_proxy_failed_requests_metric.value,
)
self.litellm_proxy_failed_requests_by_tag_metric = Counter(
name="litellm_proxy_failed_requests_by_tag_metric",
documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
labelnames=PrometheusMetricLabels.litellm_proxy_failed_requests_by_tag_metric.value,
)
self.litellm_proxy_total_requests_metric = Counter(
@ -55,6 +51,12 @@ class PrometheusLogger(CustomLogger):
labelnames=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value,
)
self.litellm_proxy_total_requests_by_tag_metric = Counter(
name="litellm_proxy_total_requests_by_tag_metric",
documentation="Total number of requests made to the proxy server - track number of client side requests by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
)
# request latency metrics
self.litellm_request_total_latency_metric = Histogram(
"litellm_request_total_latency_metric",
@ -63,12 +65,25 @@ class PrometheusLogger(CustomLogger):
buckets=LATENCY_BUCKETS,
)
self.litellm_request_total_latency_by_tag_metric = Histogram(
"litellm_request_total_latency_by_tag_metric",
"Total latency (seconds) for a request to LiteLLM by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_request_total_latency_by_tag_metric.value,
buckets=LATENCY_BUCKETS,
)
self.litellm_llm_api_latency_metric = Histogram(
"litellm_llm_api_latency_metric",
"Total latency (seconds) for a models LLM API call",
labelnames=PrometheusMetricLabels.litellm_llm_api_latency_metric.value,
buckets=LATENCY_BUCKETS,
)
self.litellm_llm_api_latency_by_tag_metric = Histogram(
"litellm_llm_api_latency_by_tag_metric",
"Total latency (seconds) for a models LLM API call by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_llm_api_latency_by_tag_metric.value,
buckets=LATENCY_BUCKETS,
)
self.litellm_llm_api_time_to_first_token_metric = Histogram(
"litellm_llm_api_time_to_first_token_metric",
@ -301,22 +316,36 @@ class PrometheusLogger(CustomLogger):
self.litellm_deployment_latency_per_output_token = Histogram(
name="litellm_deployment_latency_per_output_token",
documentation="LLM Deployment Analytics - Latency per output token",
labelnames=_logged_llm_labels + team_and_key_labels,
labelnames=PrometheusMetricLabels.litellm_deployment_latency_per_output_token.value,
)
self.litellm_deployment_latency_per_output_token_by_tag = Histogram(
name="litellm_deployment_latency_per_output_token_by_tag",
documentation="LLM Deployment Analytics - Latency per output token by custom metadata tags",
labelnames=PrometheusMetricLabels.litellm_deployment_latency_per_output_token_by_tag.value,
)
self.litellm_deployment_successful_fallbacks = Counter(
"litellm_deployment_successful_fallbacks",
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
[REQUESTED_MODEL, "fallback_model"]
+ team_and_key_labels
+ EXCEPTION_LABELS,
PrometheusMetricLabels.litellm_deployment_successful_fallbacks.value,
)
self.litellm_deployment_successful_fallbacks_by_tag = Counter(
"litellm_deployment_successful_fallbacks_by_tag",
"LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model by custom metadata tags",
PrometheusMetricLabels.litellm_deployment_successful_fallbacks_by_tag.value,
)
self.litellm_deployment_failed_fallbacks = Counter(
"litellm_deployment_failed_fallbacks",
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
[REQUESTED_MODEL, "fallback_model"]
+ team_and_key_labels
+ EXCEPTION_LABELS,
PrometheusMetricLabels.litellm_deployment_failed_fallbacks.value,
)
self.litellm_deployment_failed_fallbacks_by_tag = Counter(
"litellm_deployment_failed_fallbacks_by_tag",
"LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model by custom metadata tags",
PrometheusMetricLabels.litellm_deployment_failed_fallbacks_by_tag.value,
)
self.litellm_llm_api_failed_requests_metric = Counter(
@ -336,15 +365,7 @@ class PrometheusLogger(CustomLogger):
self.litellm_requests_metric = Counter(
name="litellm_requests_metric",
documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user",
labelnames=[
"end_user",
"hashed_api_key",
"api_key_alias",
"model",
"team",
"team_alias",
"user",
],
labelnames=PrometheusMetricLabels.litellm_requests_metric.value,
)
except Exception as e:
@ -422,6 +443,7 @@ class PrometheusLogger(CustomLogger):
user_api_team_alias=user_api_team_alias,
user_id=user_id,
response_cost=response_cost,
enum_values=enum_values,
)
# input, output, total token metrics
@ -472,7 +494,7 @@ class PrometheusLogger(CustomLogger):
# set x-ratelimit headers
self.set_llm_deployment_success_metrics(
kwargs, start_time, end_time, output_tokens
kwargs, start_time, end_time, enum_values, output_tokens
)
if (
@ -484,6 +506,14 @@ class PrometheusLogger(CustomLogger):
)
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_total_requests_by_tag_metric.labels(**_labels).inc()
def _increment_token_metrics(
self,
standard_logging_payload: StandardLoggingPayload,
@ -594,16 +624,14 @@ class PrometheusLogger(CustomLogger):
user_api_team_alias: Optional[str],
user_id: Optional[str],
response_cost: float,
enum_values: UserAPIKeyLabelValues,
):
self.litellm_requests_metric.labels(
end_user_id,
user_api_key,
user_api_key_alias,
model,
user_api_team,
user_api_team_alias,
user_id,
).inc()
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_requests_metric.value,
enum_values=enum_values,
)
self.litellm_requests_metric.labels(**_labels).inc()
self.litellm_spend_metric.labels(
end_user_id,
user_api_key,
@ -716,6 +744,15 @@ class PrometheusLogger(CustomLogger):
self.litellm_llm_api_latency_metric.labels(**_labels).observe(
api_call_total_time_seconds
)
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_llm_api_latency_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_llm_api_latency_by_tag_metric.labels(**_labels).observe(
api_call_total_time_seconds
)
# total request latency
if start_time is not None and isinstance(start_time, datetime):
@ -729,6 +766,16 @@ class PrometheusLogger(CustomLogger):
total_time_seconds
)
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_request_total_latency_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_request_total_latency_by_tag_metric.labels(
**_labels
).observe(total_time_seconds)
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
from litellm.types.utils import StandardLoggingPayload
@ -793,6 +840,7 @@ class PrometheusLogger(CustomLogger):
] + EXCEPTION_LABELS,
"""
try:
_tags = cast(List[str], request_data.get("tags") or [])
enum_values = UserAPIKeyLabelValues(
end_user=user_api_key_dict.end_user_id,
user=user_api_key_dict.user_id,
@ -802,27 +850,37 @@ class PrometheusLogger(CustomLogger):
team_alias=user_api_key_dict.team_alias,
requested_model=request_data.get("model", ""),
status_code=str(getattr(original_exception, "status_code", None)),
exception_status=str(getattr(original_exception, "status_code", None)),
exception_class=str(original_exception.__class__.__name__),
tags=_tags,
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_failed_requests_metric.value,
enum_values=enum_values,
)
self.litellm_proxy_failed_requests_metric.labels(**_labels).inc()
self.litellm_proxy_failed_requests_metric.labels(
end_user=user_api_key_dict.end_user_id,
hashed_api_key=user_api_key_dict.api_key,
api_key_alias=user_api_key_dict.key_alias,
requested_model=request_data.get("model", ""),
team=user_api_key_dict.team_id,
team_alias=user_api_key_dict.team_alias,
user=user_api_key_dict.user_id,
exception_status=getattr(original_exception, "status_code", None),
exception_class=str(original_exception.__class__.__name__),
).inc()
for tag in _tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_failed_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_failed_requests_by_tag_metric.labels(**_labels).inc()
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_metric.value,
enum_values=enum_values,
)
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
pass
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_total_requests_by_tag_metric.labels(**_labels).inc()
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))
@ -851,6 +909,14 @@ class PrometheusLogger(CustomLogger):
enum_values=enum_values,
)
self.litellm_proxy_total_requests_metric.labels(**_labels).inc()
for tag in enum_values.tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_proxy_total_requests_by_tag_metric.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_proxy_total_requests_by_tag_metric.labels(**_labels).inc()
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))
@ -962,6 +1028,7 @@ class PrometheusLogger(CustomLogger):
request_kwargs: dict,
start_time,
end_time,
enum_values: UserAPIKeyLabelValues,
output_tokens: float = 1.0,
):
try:
@ -1092,21 +1159,12 @@ class PrometheusLogger(CustomLogger):
latency_per_token = None
if output_tokens is not None and output_tokens > 0:
latency_per_token = _latency_seconds / output_tokens
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_latency_per_output_token.value,
enum_values=enum_values,
)
self.litellm_deployment_latency_per_output_token.labels(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=llm_provider,
hashed_api_key=standard_logging_payload["metadata"][
"user_api_key_hash"
],
api_key_alias=standard_logging_payload["metadata"][
"user_api_key_alias"
],
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
team_alias=standard_logging_payload["metadata"][
"user_api_key_team_alias"
],
**_labels
).observe(latency_per_token)
except Exception as e:
@ -1142,7 +1200,8 @@ class PrometheusLogger(CustomLogger):
)
)
_new_model = kwargs.get("model")
self.litellm_deployment_successful_fallbacks.labels(
_tags = cast(List[str], kwargs.get("tags") or [])
enum_values = UserAPIKeyLabelValues(
requested_model=original_model_group,
fallback_model=_new_model,
hashed_api_key=standard_metadata["user_api_key_hash"],
@ -1151,7 +1210,21 @@ class PrometheusLogger(CustomLogger):
team_alias=standard_metadata["user_api_key_team_alias"],
exception_status=str(getattr(original_exception, "status_code", None)),
exception_class=str(original_exception.__class__.__name__),
).inc()
tags=_tags,
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_successful_fallbacks.value,
enum_values=enum_values,
)
self.litellm_deployment_successful_fallbacks.labels(**_labels).inc()
for tag in _tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_successful_fallbacks_by_tag.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_deployment_successful_fallbacks_by_tag.labels(**_labels).inc()
async def log_failure_fallback_event(
self, original_model_group: str, kwargs: dict, original_exception: Exception
@ -1171,12 +1244,14 @@ class PrometheusLogger(CustomLogger):
)
_new_model = kwargs.get("model")
_metadata = kwargs.get("metadata", {})
_tags = cast(List[str], kwargs.get("tags") or [])
standard_metadata: StandardLoggingMetadata = (
StandardLoggingPayloadSetup.get_standard_logging_metadata(
metadata=_metadata
)
)
self.litellm_deployment_failed_fallbacks.labels(
enum_values = UserAPIKeyLabelValues(
requested_model=original_model_group,
fallback_model=_new_model,
hashed_api_key=standard_metadata["user_api_key_hash"],
@ -1185,7 +1260,22 @@ class PrometheusLogger(CustomLogger):
team_alias=standard_metadata["user_api_key_team_alias"],
exception_status=str(getattr(original_exception, "status_code", None)),
exception_class=str(original_exception.__class__.__name__),
).inc()
tags=_tags,
)
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_failed_fallbacks.value,
enum_values=enum_values,
)
self.litellm_deployment_failed_fallbacks.labels(**_labels).inc()
for tag in _tags:
_labels = prometheus_label_factory(
supported_enum_labels=PrometheusMetricLabels.litellm_deployment_failed_fallbacks_by_tag.value,
enum_values=enum_values,
tag=tag,
)
self.litellm_deployment_failed_fallbacks_by_tag.labels(**_labels).inc()
def set_litellm_deployment_state(
self,
@ -1273,7 +1363,9 @@ class PrometheusLogger(CustomLogger):
def prometheus_label_factory(
supported_enum_labels: List[str], enum_values: UserAPIKeyLabelValues
supported_enum_labels: List[str],
enum_values: UserAPIKeyLabelValues,
tag: Optional[str] = None,
) -> dict:
"""
Returns a dictionary of label + values for prometheus.
@ -1290,6 +1382,9 @@ def prometheus_label_factory(
if label in supported_enum_labels
}
if tag and "tag" in supported_enum_labels:
filtered_labels["tag"] = tag
if UserAPIKeyLabelNames.END_USER.value in filtered_labels:
filtered_labels["end_user"] = get_end_user_id_for_cost_tracking(
litellm_params={"user_api_key_end_user_id": enum_values.end_user},