diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 64f9b53845..46eba3a23a 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -140,6 +140,7 @@ class SlackAlerting: raise e def _get_deployment_latencies_to_alert(self, metadata=None): + print("inside get deployment latencies metadata", metadata) # noqa if metadata is None: return None @@ -188,6 +189,7 @@ class SlackAlerting: request_info=request_info, kwargs=kwargs ) # add deployment latencies to alert + print("in response taking too long callback, kwargs: ", kwargs) # noqa if ( kwargs is not None and "litellm_params" in kwargs @@ -281,6 +283,10 @@ class SlackAlerting: f"`Requests are hanging - {self.alerting_threshold}s+ request time`" ) + print( + "inside hanging request callback, request_data: ", request_data + ) # noqa + # add deployment latencies to alert _deployment_latency_map = self._get_deployment_latencies_to_alert( metadata=request_data.get("metadata", {}) diff --git a/litellm/router_strategy/lowest_latency.py b/litellm/router_strategy/lowest_latency.py index 221a666dca..998336fa12 100644 --- a/litellm/router_strategy/lowest_latency.py +++ b/litellm/router_strategy/lowest_latency.py @@ -339,12 +339,19 @@ class LowestLatencyLoggingHandler(CustomLogger): item_rpm = item_map.get(precise_minute, {}).get("rpm", 0) item_tpm = item_map.get(precise_minute, {}).get("tpm", 0) + # _latency_per_deployment is used for debuggig + _deployment_api_base = _deployment.get("litellm_params", {}).get( + "api_base", "" + ) + # get average latency total: float = 0.0 for _call_latency in item_latency: if isinstance(_call_latency, float): total += _call_latency item_latency = total / len(item_latency) + print("item_latency=", item_latency, "deployment=", deployment) # noqa + _latency_per_deployment[_deployment_api_base] = item_latency if item_latency == 0: deployment = _deployment break @@ -356,12 +363,6 @@ class LowestLatencyLoggingHandler(CustomLogger): elif item_latency < lowest_latency: lowest_latency = item_latency deployment = _deployment - - # _latency_per_deployment is used for debuggig - _deployment_api_base = _deployment.get("litellm_params", {}).get( - "api_base", "" - ) - _latency_per_deployment[_deployment_api_base] = item_latency if request_kwargs is not None and "metadata" in request_kwargs: request_kwargs["metadata"][ "_latency_per_deployment"