mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(prometheus - minor bug fix) - litellm_llm_api_time_to_first_token_metric
not populating for bedrock models (#7740)
* fix prometheus ttft * fix test_set_latency_metrics * fix _set_latency_metrics * fix _set_latency_metrics * fix test_set_latency_metrics * test_async_log_success_event * huggingface/mistralai/Mistral-7B-Instruct-v0.3
This commit is contained in:
parent
d88f01d518
commit
9daa6fb0b4
2 changed files with 3 additions and 9 deletions
|
@ -449,7 +449,6 @@ class PrometheusLogger(CustomLogger):
|
|||
# why type ignore below?
|
||||
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
|
||||
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
|
||||
standard_logging_payload=standard_logging_payload, # type: ignore
|
||||
enum_values=enum_values,
|
||||
)
|
||||
|
||||
|
@ -626,22 +625,17 @@ class PrometheusLogger(CustomLogger):
|
|||
user_api_key_alias: Optional[str],
|
||||
user_api_team: Optional[str],
|
||||
user_api_team_alias: Optional[str],
|
||||
standard_logging_payload: StandardLoggingPayload,
|
||||
enum_values: UserAPIKeyLabelValues,
|
||||
):
|
||||
# latency metrics
|
||||
model_parameters: dict = standard_logging_payload["model_parameters"]
|
||||
end_time: datetime = kwargs.get("end_time") or datetime.now()
|
||||
start_time: Optional[datetime] = kwargs.get("start_time")
|
||||
api_call_start_time = kwargs.get("api_call_start_time", None)
|
||||
|
||||
completion_start_time = kwargs.get("completion_start_time", None)
|
||||
|
||||
if (
|
||||
completion_start_time is not None
|
||||
and isinstance(completion_start_time, datetime)
|
||||
and model_parameters.get("stream")
|
||||
is True # only emit for streaming requests
|
||||
and kwargs.get("stream", False) is True # only emit for streaming requests
|
||||
):
|
||||
time_to_first_token_seconds = (
|
||||
completion_start_time - api_call_start_time
|
||||
|
|
|
@ -112,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger):
|
|||
standard_logging_object = create_standard_logging_payload()
|
||||
kwargs = {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"stream": True,
|
||||
"litellm_params": {
|
||||
"metadata": {
|
||||
"user_api_key": "test_key",
|
||||
|
@ -298,7 +299,6 @@ def test_set_latency_metrics(prometheus_logger):
|
|||
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
|
||||
"""
|
||||
standard_logging_payload = create_standard_logging_payload()
|
||||
standard_logging_payload["model_parameters"] = {"stream": True}
|
||||
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
|
||||
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
|
||||
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
|
||||
|
@ -322,6 +322,7 @@ def test_set_latency_metrics(prometheus_logger):
|
|||
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts
|
||||
"completion_start_time": now
|
||||
- timedelta(seconds=1), # when the completion starts
|
||||
"stream": True,
|
||||
}
|
||||
|
||||
prometheus_logger._set_latency_metrics(
|
||||
|
@ -331,7 +332,6 @@ def test_set_latency_metrics(prometheus_logger):
|
|||
user_api_key_alias="alias1",
|
||||
user_api_team="team1",
|
||||
user_api_team_alias="team_alias1",
|
||||
standard_logging_payload=standard_logging_payload,
|
||||
enum_values=enum_values,
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue