(prometheus - minor bug fix) - litellm_llm_api_time_to_first_token_metric not populating for bedrock models (#7740)

* fix prometheus ttft

* fix test_set_latency_metrics

* fix _set_latency_metrics

* fix _set_latency_metrics

* fix test_set_latency_metrics

* test_async_log_success_event

* huggingface/mistralai/Mistral-7B-Instruct-v0.3
This commit is contained in:
Ishaan Jaff 2025-01-13 20:16:34 -08:00 committed by GitHub
parent d88f01d518
commit 9daa6fb0b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 9 deletions

View file

@ -449,7 +449,6 @@ class PrometheusLogger(CustomLogger):
# why type ignore below? # why type ignore below?
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains. # 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal # 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
standard_logging_payload=standard_logging_payload, # type: ignore
enum_values=enum_values, enum_values=enum_values,
) )
@ -626,22 +625,17 @@ class PrometheusLogger(CustomLogger):
user_api_key_alias: Optional[str], user_api_key_alias: Optional[str],
user_api_team: Optional[str], user_api_team: Optional[str],
user_api_team_alias: Optional[str], user_api_team_alias: Optional[str],
standard_logging_payload: StandardLoggingPayload,
enum_values: UserAPIKeyLabelValues, enum_values: UserAPIKeyLabelValues,
): ):
# latency metrics # latency metrics
model_parameters: dict = standard_logging_payload["model_parameters"]
end_time: datetime = kwargs.get("end_time") or datetime.now() end_time: datetime = kwargs.get("end_time") or datetime.now()
start_time: Optional[datetime] = kwargs.get("start_time") start_time: Optional[datetime] = kwargs.get("start_time")
api_call_start_time = kwargs.get("api_call_start_time", None) api_call_start_time = kwargs.get("api_call_start_time", None)
completion_start_time = kwargs.get("completion_start_time", None) completion_start_time = kwargs.get("completion_start_time", None)
if ( if (
completion_start_time is not None completion_start_time is not None
and isinstance(completion_start_time, datetime) and isinstance(completion_start_time, datetime)
and model_parameters.get("stream") and kwargs.get("stream", False) is True # only emit for streaming requests
is True # only emit for streaming requests
): ):
time_to_first_token_seconds = ( time_to_first_token_seconds = (
completion_start_time - api_call_start_time completion_start_time - api_call_start_time

View file

@ -112,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger):
standard_logging_object = create_standard_logging_payload() standard_logging_object = create_standard_logging_payload()
kwargs = { kwargs = {
"model": "gpt-3.5-turbo", "model": "gpt-3.5-turbo",
"stream": True,
"litellm_params": { "litellm_params": {
"metadata": { "metadata": {
"user_api_key": "test_key", "user_api_key": "test_key",
@ -298,7 +299,6 @@ def test_set_latency_metrics(prometheus_logger):
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
""" """
standard_logging_payload = create_standard_logging_payload() standard_logging_payload = create_standard_logging_payload()
standard_logging_payload["model_parameters"] = {"stream": True}
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock() prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
prometheus_logger.litellm_llm_api_latency_metric = MagicMock() prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
prometheus_logger.litellm_request_total_latency_metric = MagicMock() prometheus_logger.litellm_request_total_latency_metric = MagicMock()
@ -322,6 +322,7 @@ def test_set_latency_metrics(prometheus_logger):
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts "api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts
"completion_start_time": now "completion_start_time": now
- timedelta(seconds=1), # when the completion starts - timedelta(seconds=1), # when the completion starts
"stream": True,
} }
prometheus_logger._set_latency_metrics( prometheus_logger._set_latency_metrics(
@ -331,7 +332,6 @@ def test_set_latency_metrics(prometheus_logger):
user_api_key_alias="alias1", user_api_key_alias="alias1",
user_api_team="team1", user_api_team="team1",
user_api_team_alias="team_alias1", user_api_team_alias="team_alias1",
standard_logging_payload=standard_logging_payload,
enum_values=enum_values, enum_values=enum_values,
) )