From 9daa6fb0b4a882f1dd08b11a377cc8fb99b6c310 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 13 Jan 2025 20:16:34 -0800
Subject: [PATCH] (prometheus - minor bug fix) -
 `litellm_llm_api_time_to_first_token_metric` not populating for bedrock
 models (#7740)

* fix prometheus ttft

* fix test_set_latency_metrics

* fix _set_latency_metrics

* fix _set_latency_metrics

* fix test_set_latency_metrics

* test_async_log_success_event

* huggingface/mistralai/Mistral-7B-Instruct-v0.3
---
 litellm/integrations/prometheus.py                        | 8 +-------
 .../logging_callback_tests/test_prometheus_unit_tests.py  | 4 ++--
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 3409f3a799..9ca3c48cc4 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -449,7 +449,6 @@ class PrometheusLogger(CustomLogger):
             # why type ignore below?
             # 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
             # 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
-            standard_logging_payload=standard_logging_payload,  # type: ignore
             enum_values=enum_values,
         )
 
@@ -626,22 +625,17 @@ class PrometheusLogger(CustomLogger):
         user_api_key_alias: Optional[str],
         user_api_team: Optional[str],
         user_api_team_alias: Optional[str],
-        standard_logging_payload: StandardLoggingPayload,
         enum_values: UserAPIKeyLabelValues,
     ):
         # latency metrics
-        model_parameters: dict = standard_logging_payload["model_parameters"]
         end_time: datetime = kwargs.get("end_time") or datetime.now()
         start_time: Optional[datetime] = kwargs.get("start_time")
         api_call_start_time = kwargs.get("api_call_start_time", None)
-
         completion_start_time = kwargs.get("completion_start_time", None)
-
         if (
             completion_start_time is not None
             and isinstance(completion_start_time, datetime)
-            and model_parameters.get("stream")
-            is True  # only emit for streaming requests
+            and kwargs.get("stream", False) is True  # only emit for streaming requests
         ):
             time_to_first_token_seconds = (
                 completion_start_time - api_call_start_time
diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py
index cd333c7bfc..94b3164a26 100644
--- a/tests/logging_callback_tests/test_prometheus_unit_tests.py
+++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py
@@ -112,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger):
     standard_logging_object = create_standard_logging_payload()
     kwargs = {
         "model": "gpt-3.5-turbo",
+        "stream": True,
         "litellm_params": {
             "metadata": {
                 "user_api_key": "test_key",
@@ -298,7 +299,6 @@ def test_set_latency_metrics(prometheus_logger):
     time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
     """
     standard_logging_payload = create_standard_logging_payload()
-    standard_logging_payload["model_parameters"] = {"stream": True}
     prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
     prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
     prometheus_logger.litellm_request_total_latency_metric = MagicMock()
@@ -322,6 +322,7 @@ def test_set_latency_metrics(prometheus_logger):
         "api_call_start_time": now - timedelta(seconds=1.5),  # when the api call starts
         "completion_start_time": now
         - timedelta(seconds=1),  # when the completion starts
+        "stream": True,
     }
 
     prometheus_logger._set_latency_metrics(
@@ -331,7 +332,6 @@ def test_set_latency_metrics(prometheus_logger):
         user_api_key_alias="alias1",
         user_api_team="team1",
         user_api_team_alias="team_alias1",
-        standard_logging_payload=standard_logging_payload,
         enum_values=enum_values,
     )