Merge pull request #1601 from BerriAI/litellm_improve_slack_alertign

[Feat] Proxy - Improve Slack Alerting
2025-04-27 11:43:54 +00:00 · 2024-01-24 16:43:23 -08:00 · 2024-01-24 16:43:23 -08:00 · 6dac4ab8aa
commit 6dac4ab8aa
parent 0657d0ce23 2addde9279
4 changed files with 50 additions and 10 deletions
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -7,8 +7,11 @@ handler = logging.StreamHandler()
 handler.setLevel(logging.DEBUG)
 # Create a formatter and set it for the handler
 formatter = logging.Formatter(
    "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s",
    datefmt="%H:%M:%S",
 )
 formatter = logging.Formatter("\033[92m%(name)s - %(levelname)s\033[0m: %(message)s")
 handler.setFormatter(formatter)
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -67,6 +67,8 @@ litellm_settings:
 general_settings: 
  master_key: sk-1234
  alerting: ["slack"]
  alerting_threshold: 10 # sends alerts if requests hang for 2 seconds
  # database_type: "dynamo_db" 
  # database_args: { # 👈  all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
  #   "billing_mode": "PAY_PER_REQUEST", 
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1899,6 +1899,8 @@ async def chat_completion(
        else:  # router is not set
            response = await litellm.acompletion(**data)
        # Post Call Processing
        data["litellm_status"] = "success"  # used for alerting
        if hasattr(response, "_hidden_params"):
            model_id = response._hidden_params.get("model_id", None) or ""
        else:
@ -2084,6 +2086,7 @@ async def embeddings(
            response = await litellm.aembedding(**data)
        ### ALERTING ###
        data["litellm_status"] = "success"  # used for alerting
        end_time = time.time()
        asyncio.create_task(
            proxy_logging_obj.response_taking_too_long(
@ -2199,6 +2202,7 @@ async def image_generation(
            response = await litellm.aimage_generation(**data)
        ### ALERTING ###
        data["litellm_status"] = "success"  # used for alerting
        end_time = time.time()
        asyncio.create_task(
            proxy_logging_obj.response_taking_too_long(
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -97,7 +97,7 @@ class ProxyLogging:
        3. /image/generation
        """
        ### ALERTING ###
-        asyncio.create_task(self.response_taking_too_long())
+        asyncio.create_task(self.response_taking_too_long(request_data=data))
        try:
            for callback in litellm.callbacks:
@ -137,24 +137,47 @@ class ProxyLogging:
        start_time: Optional[float] = None,
        end_time: Optional[float] = None,
        type: Literal["hanging_request", "slow_response"] = "hanging_request",
        request_data: Optional[dict] = None,
    ):
        if request_data is not None:
            model = request_data.get("model", "")
            messages = request_data.get("messages", "")
            # try casting messages to str and get the first 100 characters, else mark as None
            try:
                messages = str(messages)
                messages = messages[:10000]
            except:
                messages = None
            request_info = f"\nRequest Model: {model}\nMessages: {messages}"
        else:
            request_info = ""
        if type == "hanging_request":
            # Simulate a long-running operation that could take more than 5 minutes
            await asyncio.sleep(
                self.alerting_threshold
            )  # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
-
+            if (
-            await self.alerting_handler(
+                request_data is not None
-                message=f"Requests are hanging - {self.alerting_threshold}s+ request time",
+                and request_data.get("litellm_status", "") != "success"
-                level="Medium",
+            ):
-            )
+                # only alert hanging responses if they have not been marked as success
                alerting_message = (
                    f"Requests are hanging - {self.alerting_threshold}s+ request time"
                )
                await self.alerting_handler(
                    message=alerting_message + request_info,
                    level="Medium",
                )
        elif (
            type == "slow_response" and start_time is not None and end_time is not None
        ):
            slow_message = f"Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s"
            if end_time - start_time > self.alerting_threshold:
                await self.alerting_handler(
-                    message=f"Responses are slow - {round(end_time-start_time,2)}s response time",
+                    message=slow_message + request_info,
                    level="Low",
                )
@ -173,7 +196,13 @@ class ProxyLogging:
            level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
            message: str - what is the alert about
        """
-        formatted_message = f"Level: {level}\n\nMessage: {message}"
+        from datetime import datetime
        # Get the current timestamp
        current_time = datetime.now().strftime("%H:%M:%S")
        formatted_message = (
            f"Level: {level}\nTimestamp: {current_time}\n\nMessage: {message}"
        )
        if self.alerting is None:
            return
@ -184,7 +213,9 @@ class ProxyLogging:
                    raise Exception("Missing SLACK_WEBHOOK_URL from environment")
                payload = {"text": formatted_message}
                headers = {"Content-type": "application/json"}
-                async with aiohttp.ClientSession() as session:
+                async with aiohttp.ClientSession(
                    connector=aiohttp.TCPConnector(ssl=False)
                ) as session:
                    async with session.post(
                        slack_webhook_url, json=payload, headers=headers
                    ) as response: