mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(prometheus - minor bug fix) - litellm_llm_api_time_to_first_token_metric
not populating for bedrock models (#7740)
* fix prometheus ttft * fix test_set_latency_metrics * fix _set_latency_metrics * fix _set_latency_metrics * fix test_set_latency_metrics * test_async_log_success_event * huggingface/mistralai/Mistral-7B-Instruct-v0.3
This commit is contained in:
parent
d88f01d518
commit
9daa6fb0b4
2 changed files with 3 additions and 9 deletions
|
@ -449,7 +449,6 @@ class PrometheusLogger(CustomLogger):
|
||||||
# why type ignore below?
|
# why type ignore below?
|
||||||
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
|
# 1. We just checked if isinstance(standard_logging_payload, dict). Pyright complains.
|
||||||
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
|
# 2. Pyright does not allow us to run isinstance(standard_logging_payload, StandardLoggingPayload) <- this would be ideal
|
||||||
standard_logging_payload=standard_logging_payload, # type: ignore
|
|
||||||
enum_values=enum_values,
|
enum_values=enum_values,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -626,22 +625,17 @@ class PrometheusLogger(CustomLogger):
|
||||||
user_api_key_alias: Optional[str],
|
user_api_key_alias: Optional[str],
|
||||||
user_api_team: Optional[str],
|
user_api_team: Optional[str],
|
||||||
user_api_team_alias: Optional[str],
|
user_api_team_alias: Optional[str],
|
||||||
standard_logging_payload: StandardLoggingPayload,
|
|
||||||
enum_values: UserAPIKeyLabelValues,
|
enum_values: UserAPIKeyLabelValues,
|
||||||
):
|
):
|
||||||
# latency metrics
|
# latency metrics
|
||||||
model_parameters: dict = standard_logging_payload["model_parameters"]
|
|
||||||
end_time: datetime = kwargs.get("end_time") or datetime.now()
|
end_time: datetime = kwargs.get("end_time") or datetime.now()
|
||||||
start_time: Optional[datetime] = kwargs.get("start_time")
|
start_time: Optional[datetime] = kwargs.get("start_time")
|
||||||
api_call_start_time = kwargs.get("api_call_start_time", None)
|
api_call_start_time = kwargs.get("api_call_start_time", None)
|
||||||
|
|
||||||
completion_start_time = kwargs.get("completion_start_time", None)
|
completion_start_time = kwargs.get("completion_start_time", None)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
completion_start_time is not None
|
completion_start_time is not None
|
||||||
and isinstance(completion_start_time, datetime)
|
and isinstance(completion_start_time, datetime)
|
||||||
and model_parameters.get("stream")
|
and kwargs.get("stream", False) is True # only emit for streaming requests
|
||||||
is True # only emit for streaming requests
|
|
||||||
):
|
):
|
||||||
time_to_first_token_seconds = (
|
time_to_first_token_seconds = (
|
||||||
completion_start_time - api_call_start_time
|
completion_start_time - api_call_start_time
|
||||||
|
|
|
@ -112,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger):
|
||||||
standard_logging_object = create_standard_logging_payload()
|
standard_logging_object = create_standard_logging_payload()
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
|
"stream": True,
|
||||||
"litellm_params": {
|
"litellm_params": {
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"user_api_key": "test_key",
|
"user_api_key": "test_key",
|
||||||
|
@ -298,7 +299,6 @@ def test_set_latency_metrics(prometheus_logger):
|
||||||
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
|
time to first token, llm api latency, and request total latency metrics are set to the values in the standard logging payload
|
||||||
"""
|
"""
|
||||||
standard_logging_payload = create_standard_logging_payload()
|
standard_logging_payload = create_standard_logging_payload()
|
||||||
standard_logging_payload["model_parameters"] = {"stream": True}
|
|
||||||
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
|
prometheus_logger.litellm_llm_api_time_to_first_token_metric = MagicMock()
|
||||||
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
|
prometheus_logger.litellm_llm_api_latency_metric = MagicMock()
|
||||||
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
|
prometheus_logger.litellm_request_total_latency_metric = MagicMock()
|
||||||
|
@ -322,6 +322,7 @@ def test_set_latency_metrics(prometheus_logger):
|
||||||
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts
|
"api_call_start_time": now - timedelta(seconds=1.5), # when the api call starts
|
||||||
"completion_start_time": now
|
"completion_start_time": now
|
||||||
- timedelta(seconds=1), # when the completion starts
|
- timedelta(seconds=1), # when the completion starts
|
||||||
|
"stream": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
prometheus_logger._set_latency_metrics(
|
prometheus_logger._set_latency_metrics(
|
||||||
|
@ -331,7 +332,6 @@ def test_set_latency_metrics(prometheus_logger):
|
||||||
user_api_key_alias="alias1",
|
user_api_key_alias="alias1",
|
||||||
user_api_team="team1",
|
user_api_team="team1",
|
||||||
user_api_team_alias="team_alias1",
|
user_api_team_alias="team_alias1",
|
||||||
standard_logging_payload=standard_logging_payload,
|
|
||||||
enum_values=enum_values,
|
enum_values=enum_values,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue