(fix) only alert users when requests are hanging

2025-04-25 18:54:30 +00:00 · 2024-01-24 15:58:07 -08:00 · 2024-01-24 15:58:07 -08:00 · b993c62144
commit b993c62144
parent 6c13776701
2 changed files with 15 additions and 8 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1863,6 +1863,8 @@ async def chat_completion(
        else:  # router is not set
            response = await litellm.acompletion(**data)

+        # Post Call Processing
+        data["litellm_status"] = "success"  # used for alerting
        if hasattr(response, "_hidden_params"):
            model_id = response._hidden_params.get("model_id", None) or ""
        else:
@ -2048,6 +2050,7 @@ async def embeddings(
            response = await litellm.aembedding(**data)

        ### ALERTING ###
+        data["litellm_status"] = "success"  # used for alerting
        end_time = time.time()
        asyncio.create_task(
            proxy_logging_obj.response_taking_too_long(
@ -2163,6 +2166,7 @@ async def image_generation(
            response = await litellm.aimage_generation(**data)

        ### ALERTING ###
+        data["litellm_status"] = "success"  # used for alerting
        end_time = time.time()
        asyncio.create_task(
            proxy_logging_obj.response_taking_too_long(
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -158,7 +158,10 @@ class ProxyLogging:
            await asyncio.sleep(
                self.alerting_threshold
            )  # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
-
+            if (
+                request_data is not None
+                and request_data.get("litellm_status", "") != "success"
+            ):
                alerting_message = (
                    f"Requests are hanging - {self.alerting_threshold}s+ request time"
                )