From bcba49ab1f8530d5a4e802f64b107ae6b51fd450 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 19 Feb 2025 20:08:46 -0800
Subject: [PATCH] (Bug fix) prometheus - safely set latency metrics  (#8669)

* use safe_duration_seconds

* _safe_duration_seconds

* test_set_latency_metrics_missing_timestamps
---
 litellm/integrations/prometheus.py            | 44 ++++++---
 .../test_prometheus_unit_tests.py             | 94 +++++++++++++++++++
 2 files changed, 125 insertions(+), 13 deletions(-)

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 9d56fc7125..04050abf7b 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -691,14 +691,14 @@ class PrometheusLogger(CustomLogger):
         start_time: Optional[datetime] = kwargs.get("start_time")
         api_call_start_time = kwargs.get("api_call_start_time", None)
         completion_start_time = kwargs.get("completion_start_time", None)
+        time_to_first_token_seconds = self._safe_duration_seconds(
+            start_time=api_call_start_time,
+            end_time=completion_start_time,
+        )
         if (
-            completion_start_time is not None
-            and isinstance(completion_start_time, datetime)
+            time_to_first_token_seconds is not None
             and kwargs.get("stream", False) is True  # only emit for streaming requests
         ):
-            time_to_first_token_seconds = (
-                completion_start_time - api_call_start_time
-            ).total_seconds()
             self.litellm_llm_api_time_to_first_token_metric.labels(
                 model,
                 user_api_key,
@@ -710,11 +710,12 @@ class PrometheusLogger(CustomLogger):
             verbose_logger.debug(
                 "Time to first token metric not emitted, stream option in model_parameters is not True"
             )
-        if api_call_start_time is not None and isinstance(
-            api_call_start_time, datetime
-        ):
-            api_call_total_time: timedelta = end_time - api_call_start_time
-            api_call_total_time_seconds = api_call_total_time.total_seconds()
+
+        api_call_total_time_seconds = self._safe_duration_seconds(
+            start_time=api_call_start_time,
+            end_time=end_time,
+        )
+        if api_call_total_time_seconds is not None:
             _labels = prometheus_label_factory(
                 supported_enum_labels=PrometheusMetricLabels.get_labels(
                     label_name="litellm_llm_api_latency_metric"
@@ -726,9 +727,11 @@ class PrometheusLogger(CustomLogger):
             )
 
         # total request latency
-        if start_time is not None and isinstance(start_time, datetime):
-            total_time: timedelta = end_time - start_time
-            total_time_seconds = total_time.total_seconds()
+        total_time_seconds = self._safe_duration_seconds(
+            start_time=start_time,
+            end_time=end_time,
+        )
+        if total_time_seconds is not None:
             _labels = prometheus_label_factory(
                 supported_enum_labels=PrometheusMetricLabels.get_labels(
                     label_name="litellm_request_total_latency_metric"
@@ -1689,6 +1692,21 @@ class PrometheusLogger(CustomLogger):
             budget_reset_at - datetime.now(budget_reset_at.tzinfo)
         ).total_seconds() / 3600
 
+    def _safe_duration_seconds(
+        self,
+        start_time: Any,
+        end_time: Any,
+    ) -> Optional[float]:
+        """
+        Compute the duration in seconds between two objects.
+
+        Returns the duration as a float if both start and end are instances of datetime,
+        otherwise returns None.
+        """
+        if isinstance(start_time, datetime) and isinstance(end_time, datetime):
+            return (end_time - start_time).total_seconds()
+        return None
+
 
 def prometheus_label_factory(
     supported_enum_labels: List[str],
diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py
index 4c328bcc82..641d121fa9 100644
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@@ -436,6 +436,100 @@ def test_set_latency_metrics(prometheus_logger):
     )
 
 
+def test_set_latency_metrics_missing_timestamps(prometheus_logger):
+    """
+    Test that _set_latency_metrics handles missing timestamp values gracefully
+    """
+    # Mock all metrics used in the method
+    prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
+    prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
+    prometheus_logger.litellm_request_total_latency_metric = MagicMock()
+
+    standard_logging_payload = create_standard_logging_payload()
+    enum_values = UserAPIKeyLabelValues(
+        litellm_model_name=standard_logging_payload["model"],
+        api_provider=standard_logging_payload["custom_llm_provider"],
+        hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
+        api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
+        team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+        team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
+    )
+
+    # Test case where completion_start_time is None
+    kwargs = {
+        "end_time": datetime.now(),
+        "start_time": datetime.now() - timedelta(seconds=2),
+        "api_call_start_time": datetime.now() - timedelta(seconds=1.5),
+        "completion_start_time": None,  # Missing completion start time
+        "stream": True,
+    }
+
+    # This should not raise an exception
+    prometheus_logger._set_latency_metrics(
+        kwargs=kwargs,
+        model="gpt-3.5-turbo",
+        user_api_key="key1",
+        user_api_key_alias="alias1",
+        user_api_team="team1",
+        user_api_team_alias="team_alias1",
+        enum_values=enum_values,
+    )
+
+    # Verify time to first token metric was not called due to missing completion_start_time
+    prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called()
+
+    # Other metrics should still be called
+    prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once()
+    prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once()
+
+
+def test_set_latency_metrics_missing_api_call_start(prometheus_logger):
+    """
+    Test that _set_latency_metrics handles missing api_call_start_time gracefully
+    """
+    # Mock all metrics used in the method
+    prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
+    prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
+    prometheus_logger.litellm_request_total_latency_metric = MagicMock()
+
+    standard_logging_payload = create_standard_logging_payload()
+    enum_values = UserAPIKeyLabelValues(
+        litellm_model_name=standard_logging_payload["model"],
+        api_provider=standard_logging_payload["custom_llm_provider"],
+        hashed_api_key=standard_logging_payload["metadata"]["user_api_key_hash"],
+        api_key_alias=standard_logging_payload["metadata"]["user_api_key_alias"],
+        team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+        team_alias=standard_logging_payload["metadata"]["user_api_key_team_alias"],
+    )
+
+    # Test case where api_call_start_time is None
+    kwargs = {
+        "end_time": datetime.now(),
+        "start_time": datetime.now() - timedelta(seconds=2),
+        "api_call_start_time": None,  # Missing API call start time
+        "completion_start_time": datetime.now() - timedelta(seconds=1),
+        "stream": True,
+    }
+
+    # This should not raise an exception
+    prometheus_logger._set_latency_metrics(
+        kwargs=kwargs,
+        model="gpt-3.5-turbo",
+        user_api_key="key1",
+        user_api_key_alias="alias1",
+        user_api_team="team1",
+        user_api_team_alias="team_alias1",
+        enum_values=enum_values,
+    )
+
+    # Verify API latency metrics were not called due to missing api_call_start_time
+    prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_not_called()
+    prometheus_logger.litellm_llm_api_latency_metric.labels.assert_not_called()
+
+    # Total request latency should still be called
+    prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once()
+
+
 def test_increment_top_level_request_and_spend_metrics(prometheus_logger):
     """
     Test the increment_top_level_request_and_spend_metrics method