From bcba49ab1f8530d5a4e802f64b107ae6b51fd450 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 19 Feb 2025 20:08:46 -0800 Subject: [PATCH] (Bug fix) prometheus - safely set latency metrics (#8669) * use safe_duration_seconds * _safe_duration_seconds * test_set_latency_metrics_missing_timestamps --- litellm/integrations/prometheus.py | 44 ++++++--- .../test_prometheus_unit_tests.py | 94 +++++++++++++++++++ 2 files changed, 125 insertions(+), 13 deletions(-) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 9d56fc7125..04050abf7b 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -691,14 +691,14 @@ class PrometheusLogger(CustomLogger): start_time: Optional[datetime] = kwargs.get("start_time") api_call_start_time = kwargs.get("api_call_start_time", None) completion_start_time = kwargs.get("completion_start_time", None) + time_to_first_token_seconds = self._safe_duration_seconds( + start_time=api_call_start_time, + end_time=completion_start_time, + ) if ( - completion_start_time is not None - and isinstance(completion_start_time, datetime) + time_to_first_token_seconds is not None and kwargs.get("stream", False) is True # only emit for streaming requests ): - time_to_first_token_seconds = ( - completion_start_time - api_call_start_time - ).total_seconds() self.litellm_llm_api_time_to_first_token_metric.labels( model, user_api_key, @@ -710,11 +710,12 @@ class PrometheusLogger(CustomLogger): verbose_logger.debug( "Time to first token metric not emitted, stream option in model_parameters is not True" ) - if api_call_start_time is not None and isinstance( - api_call_start_time, datetime - ): - api_call_total_time: timedelta = end_time - api_call_start_time - api_call_total_time_seconds = api_call_total_time.total_seconds() + + api_call_total_time_seconds = self._safe_duration_seconds( + start_time=api_call_start_time, + end_time=end_time, + ) + if api_call_total_time_seconds is not None: _labels = prometheus_label_factory( supported_enum_labels=PrometheusMetricLabels.get_labels( label_name="litellm_llm_api_latency_metric" @@ -726,9 +727,11 @@ class PrometheusLogger(CustomLogger): ) # total request latency - if start_time is not None and isinstance(start_time, datetime): - total_time: timedelta = end_time - start_time - total_time_seconds = total_time.total_seconds() + total_time_seconds = self._safe_duration_seconds( + start_time=start_time, + end_time=end_time, + ) + if total_time_seconds is not None: _labels = prometheus_label_factory( supported_enum_labels=PrometheusMetricLabels.get_labels( label_name="litellm_request_total_latency_metric" @@ -1689,6 +1692,21 @@ class PrometheusLogger(CustomLogger): budget_reset_at - datetime.now(budget_reset_at.tzinfo) ).total_seconds() / 3600 + def _safe_duration_seconds( + self, + start_time: Any, + end_time: Any, + ) -> Optional[float]: + """ + Compute the duration in seconds between two objects. + + Returns the duration as a float if both start and end are instances of datetime, + otherwise returns None. + """ + if isinstance(start_time, datetime) and isinstance(end_time, datetime): + return (end_time - start_time).total_seconds() + return None + def prometheus_label_factory( supported_enum_labels: List[str], diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py index 4c328bcc82..641d121fa9 100644 --- a/tests/logging_callback_tests/test_prometheus_unit_tests.py +++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py @@ -436,6 +436,100 @@ def test_set_latency_metrics(prometheus_logger): ) +def test_set_latency_metrics_missing_timestamps(prometheus_logger): + """ + Test that _set_latency_metrics handles missing timestamp values gracefully + """ + # Mock all metrics used in the method + prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() + prometheus_logger.litellm_llm_api_latency_metric = MagicMock() + prometheus_logger.litellm_request_total_latency_metric = MagicMock() + + standard_logging_payload = create_standard_logging_payload() + enum_values = UserAPIKeyLabelValues( + litellm_model_name=standard_logging_payload["model"], + api_provider=standard_logging_payload["custom_llm_provider"], + hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], + api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], + team=standard_logging_payload["metadata"]["user_api_key_team_id"], + team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], + ) + + # Test case where completion_start_time is None + kwargs = { + "end_time": datetime.now(), + "start_time": datetime.now() - timedelta(seconds=2), + "api_call_start_time": datetime.now() - timedelta(seconds=1.5), + "completion_start_time": None, # Missing completion start time + "stream": True, + } + + # This should not raise an exception + prometheus_logger._set_latency_metrics( + kwargs=kwargs, + model="gpt-3.5-turbo", + user_api_key="key1", + user_api_key_alias="alias1", + user_api_team="team1", + user_api_team_alias="team_alias1", + enum_values=enum_values, + ) + + # Verify time to first token metric was not called due to missing completion_start_time + prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called() + + # Other metrics should still be called + prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once() + prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once() + + +def test_set_latency_metrics_missing_api_call_start(prometheus_logger): + """ + Test that _set_latency_metrics handles missing api_call_start_time gracefully + """ + # Mock all metrics used in the method + prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() + prometheus_logger.litellm_llm_api_latency_metric = MagicMock() + prometheus_logger.litellm_request_total_latency_metric = MagicMock() + + standard_logging_payload = create_standard_logging_payload() + enum_values = UserAPIKeyLabelValues( + litellm_model_name=standard_logging_payload["model"], + api_provider=standard_logging_payload["custom_llm_provider"], + hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"], + api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"], + team=standard_logging_payload["metadata"]["user_api_key_team_id"], + team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"], + ) + + # Test case where api_call_start_time is None + kwargs = { + "end_time": datetime.now(), + "start_time": datetime.now() - timedelta(seconds=2), + "api_call_start_time": None, # Missing API call start time + "completion_start_time": datetime.now() - timedelta(seconds=1), + "stream": True, + } + + # This should not raise an exception + prometheus_logger._set_latency_metrics( + kwargs=kwargs, + model="gpt-3.5-turbo", + user_api_key="key1", + user_api_key_alias="alias1", + user_api_team="team1", + user_api_team_alias="team_alias1", + enum_values=enum_values, + ) + + # Verify API latency metrics were not called due to missing api_call_start_time + prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called() + prometheus_logger.litellm_llm_api_latency_metric.labels.assert_not_called() + + # Total request latency should still be called + prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once() + + def test_increment_top_level_request_and_spend_metrics(prometheus_logger): """ Test the increment_top_level_request_and_spend_metrics method