mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
temp - show better debug logs for lowest latency
This commit is contained in:
parent
d4d81dce01
commit
3b9d6dfc47
2 changed files with 13 additions and 6 deletions
|
@ -140,6 +140,7 @@ class SlackAlerting:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def _get_deployment_latencies_to_alert(self, metadata=None):
|
def _get_deployment_latencies_to_alert(self, metadata=None):
|
||||||
|
print("inside get deployment latencies metadata", metadata) # noqa
|
||||||
|
|
||||||
if metadata is None:
|
if metadata is None:
|
||||||
return None
|
return None
|
||||||
|
@ -188,6 +189,7 @@ class SlackAlerting:
|
||||||
request_info=request_info, kwargs=kwargs
|
request_info=request_info, kwargs=kwargs
|
||||||
)
|
)
|
||||||
# add deployment latencies to alert
|
# add deployment latencies to alert
|
||||||
|
print("in response taking too long callback, kwargs: ", kwargs) # noqa
|
||||||
if (
|
if (
|
||||||
kwargs is not None
|
kwargs is not None
|
||||||
and "litellm_params" in kwargs
|
and "litellm_params" in kwargs
|
||||||
|
@ -281,6 +283,10 @@ class SlackAlerting:
|
||||||
f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
|
f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print(
|
||||||
|
"inside hanging request callback, request_data: ", request_data
|
||||||
|
) # noqa
|
||||||
|
|
||||||
# add deployment latencies to alert
|
# add deployment latencies to alert
|
||||||
_deployment_latency_map = self._get_deployment_latencies_to_alert(
|
_deployment_latency_map = self._get_deployment_latencies_to_alert(
|
||||||
metadata=request_data.get("metadata", {})
|
metadata=request_data.get("metadata", {})
|
||||||
|
|
|
@ -339,12 +339,19 @@ class LowestLatencyLoggingHandler(CustomLogger):
|
||||||
item_rpm = item_map.get(precise_minute, {}).get("rpm", 0)
|
item_rpm = item_map.get(precise_minute, {}).get("rpm", 0)
|
||||||
item_tpm = item_map.get(precise_minute, {}).get("tpm", 0)
|
item_tpm = item_map.get(precise_minute, {}).get("tpm", 0)
|
||||||
|
|
||||||
|
# _latency_per_deployment is used for debuggig
|
||||||
|
_deployment_api_base = _deployment.get("litellm_params", {}).get(
|
||||||
|
"api_base", ""
|
||||||
|
)
|
||||||
|
|
||||||
# get average latency
|
# get average latency
|
||||||
total: float = 0.0
|
total: float = 0.0
|
||||||
for _call_latency in item_latency:
|
for _call_latency in item_latency:
|
||||||
if isinstance(_call_latency, float):
|
if isinstance(_call_latency, float):
|
||||||
total += _call_latency
|
total += _call_latency
|
||||||
item_latency = total / len(item_latency)
|
item_latency = total / len(item_latency)
|
||||||
|
print("item_latency=", item_latency, "deployment=", deployment) # noqa
|
||||||
|
_latency_per_deployment[_deployment_api_base] = item_latency
|
||||||
if item_latency == 0:
|
if item_latency == 0:
|
||||||
deployment = _deployment
|
deployment = _deployment
|
||||||
break
|
break
|
||||||
|
@ -356,12 +363,6 @@ class LowestLatencyLoggingHandler(CustomLogger):
|
||||||
elif item_latency < lowest_latency:
|
elif item_latency < lowest_latency:
|
||||||
lowest_latency = item_latency
|
lowest_latency = item_latency
|
||||||
deployment = _deployment
|
deployment = _deployment
|
||||||
|
|
||||||
# _latency_per_deployment is used for debuggig
|
|
||||||
_deployment_api_base = _deployment.get("litellm_params", {}).get(
|
|
||||||
"api_base", ""
|
|
||||||
)
|
|
||||||
_latency_per_deployment[_deployment_api_base] = item_latency
|
|
||||||
if request_kwargs is not None and "metadata" in request_kwargs:
|
if request_kwargs is not None and "metadata" in request_kwargs:
|
||||||
request_kwargs["metadata"][
|
request_kwargs["metadata"][
|
||||||
"_latency_per_deployment"
|
"_latency_per_deployment"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue