mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
(Bug fix) prometheus - safely set latency metrics (#8669)
* use safe_duration_seconds * _safe_duration_seconds * test_set_latency_metrics_missing_timestamps
This commit is contained in:
parent
5dc62c9e7b
commit
bcba49ab1f
2 changed files with 125 additions and 13 deletions
|
@ -691,14 +691,14 @@ class PrometheusLogger(CustomLogger):
|
||||||
start_time: Optional[datetime] = kwargs.get("start_time")
|
start_time: Optional[datetime] = kwargs.get("start_time")
|
||||||
api_call_start_time = kwargs.get("api_call_start_time", None)
|
api_call_start_time = kwargs.get("api_call_start_time", None)
|
||||||
completion_start_time = kwargs.get("completion_start_time", None)
|
completion_start_time = kwargs.get("completion_start_time", None)
|
||||||
|
time_to_first_token_seconds = self._safe_duration_seconds(
|
||||||
|
start_time=api_call_start_time,
|
||||||
|
end_time=completion_start_time,
|
||||||
|
)
|
||||||
if (
|
if (
|
||||||
completion_start_time is not None
|
time_to_first_token_seconds is not None
|
||||||
and isinstance(completion_start_time, datetime)
|
|
||||||
and kwargs.get("stream", False) is True # only emit for streaming requests
|
and kwargs.get("stream", False) is True # only emit for streaming requests
|
||||||
):
|
):
|
||||||
time_to_first_token_seconds = (
|
|
||||||
completion_start_time - api_call_start_time
|
|
||||||
).total_seconds()
|
|
||||||
self.litellm_llm_api_time_to_first_token_metric.labels(
|
self.litellm_llm_api_time_to_first_token_metric.labels(
|
||||||
model,
|
model,
|
||||||
user_api_key,
|
user_api_key,
|
||||||
|
@ -710,11 +710,12 @@ class PrometheusLogger(CustomLogger):
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"Time to first token metric not emitted, stream option in model_parameters is not True"
|
"Time to first token metric not emitted, stream option in model_parameters is not True"
|
||||||
)
|
)
|
||||||
if api_call_start_time is not None and isinstance(
|
|
||||||
api_call_start_time, datetime
|
api_call_total_time_seconds = self._safe_duration_seconds(
|
||||||
):
|
start_time=api_call_start_time,
|
||||||
api_call_total_time: timedelta = end_time - api_call_start_time
|
end_time=end_time,
|
||||||
api_call_total_time_seconds = api_call_total_time.total_seconds()
|
)
|
||||||
|
if api_call_total_time_seconds is not None:
|
||||||
_labels = prometheus_label_factory(
|
_labels = prometheus_label_factory(
|
||||||
supported_enum_labels=PrometheusMetricLabels.get_labels(
|
supported_enum_labels=PrometheusMetricLabels.get_labels(
|
||||||
label_name="litellm_llm_api_latency_metric"
|
label_name="litellm_llm_api_latency_metric"
|
||||||
|
@ -726,9 +727,11 @@ class PrometheusLogger(CustomLogger):
|
||||||
)
|
)
|
||||||
|
|
||||||
# total request latency
|
# total request latency
|
||||||
if start_time is not None and isinstance(start_time, datetime):
|
total_time_seconds = self._safe_duration_seconds(
|
||||||
total_time: timedelta = end_time - start_time
|
start_time=start_time,
|
||||||
total_time_seconds = total_time.total_seconds()
|
end_time=end_time,
|
||||||
|
)
|
||||||
|
if total_time_seconds is not None:
|
||||||
_labels = prometheus_label_factory(
|
_labels = prometheus_label_factory(
|
||||||
supported_enum_labels=PrometheusMetricLabels.get_labels(
|
supported_enum_labels=PrometheusMetricLabels.get_labels(
|
||||||
label_name="litellm_request_total_latency_metric"
|
label_name="litellm_request_total_latency_metric"
|
||||||
|
@ -1689,6 +1692,21 @@ class PrometheusLogger(CustomLogger):
|
||||||
budget_reset_at - datetime.now(budget_reset_at.tzinfo)
|
budget_reset_at - datetime.now(budget_reset_at.tzinfo)
|
||||||
).total_seconds() / 3600
|
).total_seconds() / 3600
|
||||||
|
|
||||||
|
def _safe_duration_seconds(
|
||||||
|
self,
|
||||||
|
start_time: Any,
|
||||||
|
end_time: Any,
|
||||||
|
) -> Optional[float]:
|
||||||
|
"""
|
||||||
|
Compute the duration in seconds between two objects.
|
||||||
|
|
||||||
|
Returns the duration as a float if both start and end are instances of datetime,
|
||||||
|
otherwise returns None.
|
||||||
|
"""
|
||||||
|
if isinstance(start_time, datetime) and isinstance(end_time, datetime):
|
||||||
|
return (end_time - start_time).total_seconds()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def prometheus_label_factory(
|
def prometheus_label_factory(
|
||||||
supported_enum_labels: List[str],
|
supported_enum_labels: List[str],
|
||||||
|
|
|
@ -436,6 +436,100 @@ def test_set_latency_metrics(prometheus_logger):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_latency_metrics_missing_timestamps(prometheus_logger):
|
||||||
|
"""
|
||||||
|
Test that _set_latency_metrics handles missing timestamp values gracefully
|
||||||
|
"""
|
||||||
|
# Mock all metrics used in the method
|
||||||
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
|
||||||
|
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
|
||||||
|
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
|
||||||
|
|
||||||
|
standard_logging_payload = create_standard_logging_payload()
|
||||||
|
enum_values = UserAPIKeyLabelValues(
|
||||||
|
litellm_model_name=standard_logging_payload["model"],
|
||||||
|
api_provider=standard_logging_payload["custom_llm_provider"],
|
||||||
|
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
|
||||||
|
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
|
||||||
|
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
|
||||||
|
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test case where completion_start_time is None
|
||||||
|
kwargs = {
|
||||||
|
"end_time": datetime.now(),
|
||||||
|
"start_time": datetime.now() - timedelta(seconds=2),
|
||||||
|
"api_call_start_time": datetime.now() - timedelta(seconds=1.5),
|
||||||
|
"completion_start_time": None, # Missing completion start time
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
prometheus_logger._set_latency_metrics(
|
||||||
|
kwargs=kwargs,
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
user_api_key="key1",
|
||||||
|
user_api_key_alias="alias1",
|
||||||
|
user_api_team="team1",
|
||||||
|
user_api_team_alias="team_alias1",
|
||||||
|
enum_values=enum_values,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify time to first token metric was not called due to missing completion_start_time
|
||||||
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called()
|
||||||
|
|
||||||
|
# Other metrics should still be called
|
||||||
|
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once()
|
||||||
|
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_latency_metrics_missing_api_call_start(prometheus_logger):
|
||||||
|
"""
|
||||||
|
Test that _set_latency_metrics handles missing api_call_start_time gracefully
|
||||||
|
"""
|
||||||
|
# Mock all metrics used in the method
|
||||||
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
|
||||||
|
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
|
||||||
|
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
|
||||||
|
|
||||||
|
standard_logging_payload = create_standard_logging_payload()
|
||||||
|
enum_values = UserAPIKeyLabelValues(
|
||||||
|
litellm_model_name=standard_logging_payload["model"],
|
||||||
|
api_provider=standard_logging_payload["custom_llm_provider"],
|
||||||
|
hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
|
||||||
|
api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
|
||||||
|
team=standard_logging_payload["metadata"]["user_api_key_team_id"],
|
||||||
|
team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Test case where api_call_start_time is None
|
||||||
|
kwargs = {
|
||||||
|
"end_time": datetime.now(),
|
||||||
|
"start_time": datetime.now() - timedelta(seconds=2),
|
||||||
|
"api_call_start_time": None, # Missing API call start time
|
||||||
|
"completion_start_time": datetime.now() - timedelta(seconds=1),
|
||||||
|
"stream": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# This should not raise an exception
|
||||||
|
prometheus_logger._set_latency_metrics(
|
||||||
|
kwargs=kwargs,
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
user_api_key="key1",
|
||||||
|
user_api_key_alias="alias1",
|
||||||
|
user_api_team="team1",
|
||||||
|
user_api_team_alias="team_alias1",
|
||||||
|
enum_values=enum_values,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify API latency metrics were not called due to missing api_call_start_time
|
||||||
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called()
|
||||||
|
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_not_called()
|
||||||
|
|
||||||
|
# Total request latency should still be called
|
||||||
|
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
def test_increment_top_level_request_and_spend_metrics(prometheus_logger):
|
def test_increment_top_level_request_and_spend_metrics(prometheus_logger):
|
||||||
"""
|
"""
|
||||||
Test the increment_top_level_request_and_spend_metrics method
|
Test the increment_top_level_request_and_spend_metrics method
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue