From 9daa6fb0b4a882f1dd08b11a377cc8fb99b6c310 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 13 Jan 2025 20:16:34 -0800 Subject: [PATCH] (prometheus - minor bug fix) - `litellm_llm_api_time_to_first_token_metric` not populating for bedrock models (#7740) * fix prometheus ttft * fix test_set_latency_metrics * fix _set_latency_metrics * fix _set_latency_metrics * fix test_set_latency_metrics * test_async_log_success_event * huggingface/mistralai/Mistral-7B-Instruct-v0.3 --- litellm/integrations/prometheus.py | 8 +------- .../logging_callback_tests/test_prometheus_unit_tests.py | 4 ++-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 3409f3a799..9ca3c48cc4 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -449,7 +449,6 @@ class PrometheusLogger(CustomLogger): # why type ignore below? # 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains. # 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal - standard_logging_payload=standard_logging_payload, # type: ignore enum_values=enum_values, ) @@ -626,22 +625,17 @@ class PrometheusLogger(CustomLogger): user_api_key_alias: Optional[str], user_api_team: Optional[str], user_api_team_alias: Optional[str], - standard_logging_payload: StandardLoggingPayload, enum_values: UserAPIKeyLabelValues, ): # latency metrics - model_parameters: dict = standard_logging_payload["model_parameters"] end_time: datetime = kwargs.get("end_time") or datetime.now() start_time: Optional[datetime] = kwargs.get("start_time") api_call_start_time = kwargs.get("api_call_start_time", None) - completion_start_time = kwargs.get("completion_start_time", None) - if ( completion_start_time is not None and isinstance(completion_start_time, datetime) - and model_parameters.get("stream") - is True # only emit for streaming requests + and kwargs.get("stream", False) is True # only emit for streaming requests ): time_to_first_token_seconds = ( completion_start_time - api_call_start_time diff --git a/tests/logging_callback_tests/test_prometheus_unit_tests.py b/tests/logging_callback_tests/test_prometheus_unit_tests.py index cd333c7bfc..94b3164a26 100644 --- a/tests/logging_callback_tests/test_prometheus_unit_tests.py +++ b/tests/logging_callback_tests/test_prometheus_unit_tests.py @@ -112,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger): standard_logging_object = create_standard_logging_payload() kwargs = { "model": "gpt-3.5-turbo", + "stream": True, "litellm_params": { "metadata": { "user_api_key": "test_key", @@ -298,7 +299,6 @@ def test_set_latency_metrics(prometheus_logger): time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload """ standard_logging_payload = create_standard_logging_payload() - standard_logging_payload["model_parameters"] = {"stream": True} prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() prometheus_logger.litellm_llm_api_latency_metric = MagicMock() prometheus_logger.litellm_request_total_latency_metric = MagicMock() @@ -322,6 +322,7 @@ def test_set_latency_metrics(prometheus_logger): "api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts "completion_start_time": now - timedelta(seconds=1), # when the completion starts + "stream": True, } prometheus_logger._set_latency_metrics( @@ -331,7 +332,6 @@ def test_set_latency_metrics(prometheus_logger): user_api_key_alias="alias1", user_api_team="team1", user_api_team_alias="team_alias1", - standard_logging_payload=standard_logging_payload, enum_values=enum_values, )