(prometheus - minor bug fix) - litellm_llm_api_time_to_first_token_metric not populating for bedrock models (#7740)

* fix prometheus ttft

* fix test_set_latency_metrics

* fix _set_latency_metrics

* fix _set_latency_metrics

* fix test_set_latency_metrics

* test_async_log_success_event

* huggingface/mistralai/Mistral-7B-Instruct-v0.3
This commit is contained in:
Ishaan Jaff 2025-01-13 20:16:34 -08:00 committed by GitHub
parent d88f01d518
commit 9daa6fb0b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 3 additions and 9 deletions

View file

@ -449,7 +449,6 @@ class PrometheusLogger(CustomLogger):
# why type ignore below?
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
standard_logging_payload=standard_logging_payload, # type: ignore
enum_values=enum_values,
)
@ -626,22 +625,17 @@ class PrometheusLogger(CustomLogger):
user_api_key_alias: Optional[str],
user_api_team: Optional[str],
user_api_team_alias: Optional[str],
standard_logging_payload: StandardLoggingPayload,
enum_values: UserAPIKeyLabelValues,
):
# latency metrics
model_parameters: dict = standard_logging_payload["model_parameters"]
end_time: datetime = kwargs.get("end_time") or datetime.now()
start_time: Optional[datetime] = kwargs.get("start_time")
api_call_start_time = kwargs.get("api_call_start_time", None)
completion_start_time = kwargs.get("completion_start_time", None)
if (
completion_start_time is not None
and isinstance(completion_start_time, datetime)
and model_parameters.get("stream")
is True # only emit for streaming requests
and kwargs.get("stream", False) is True # only emit for streaming requests
):
time_to_first_token_seconds = (
completion_start_time - api_call_start_time

View file

@ -112,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger):
standard_logging_object = create_standard_logging_payload()
kwargs = {
"model": "gpt-3.5-turbo",
"stream": True,
"litellm_params": {
"metadata": {
"user_api_key": "test_key",
@ -298,7 +299,6 @@ def test_set_latency_metrics(prometheus_logger):
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
"""
standard_logging_payload = create_standard_logging_payload()
standard_logging_payload["model_parameters"] = {"stream": True}
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
@ -322,6 +322,7 @@ def test_set_latency_metrics(prometheus_logger):
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts
"completion_start_time": now
- timedelta(seconds=1), # when the completion starts
"stream": True,
}
prometheus_logger._set_latency_metrics(
@ -331,7 +332,6 @@ def test_set_latency_metrics(prometheus_logger):
user_api_key_alias="alias1",
user_api_team="team1",
user_api_team_alias="team_alias1",
standard_logging_payload=standard_logging_payload,
enum_values=enum_values,
)