diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index ca58371f4..d8365404c 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1863,6 +1863,8 @@ async def chat_completion( else: # router is not set response = await litellm.acompletion(**data) + # Post Call Processing + data["litellm_status"] = "success" # used for alerting if hasattr(response, "_hidden_params"): model_id = response._hidden_params.get("model_id", None) or "" else: @@ -2048,6 +2050,7 @@ async def embeddings( response = await litellm.aembedding(**data) ### ALERTING ### + data["litellm_status"] = "success" # used for alerting end_time = time.time() asyncio.create_task( proxy_logging_obj.response_taking_too_long( @@ -2163,6 +2166,7 @@ async def image_generation( response = await litellm.aimage_generation(**data) ### ALERTING ### + data["litellm_status"] = "success" # used for alerting end_time = time.time() asyncio.create_task( proxy_logging_obj.response_taking_too_long( diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index ebc2dbc05..d638d162d 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -158,14 +158,17 @@ class ProxyLogging: await asyncio.sleep( self.alerting_threshold ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests - - alerting_message = ( - f"Requests are hanging - {self.alerting_threshold}s+ request time" - ) - await self.alerting_handler( - message=alerting_message + request_info, - level="Medium", - ) + if ( + request_data is not None + and request_data.get("litellm_status", "") != "success" + ): + alerting_message = ( + f"Requests are hanging - {self.alerting_threshold}s+ request time" + ) + await self.alerting_handler( + message=alerting_message + request_info, + level="Medium", + ) elif ( type == "slow_response" and start_time is not None and end_time is not None