From 3fe475c9cedf988ee6ade9e4788e57a0f27b3fd9 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 24 Jan 2024 14:55:21 -0800
Subject: [PATCH 1/6] (feat) slack alerting - log request/response

---
 litellm/proxy/utils.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 978355568c..222a21592e 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -97,7 +97,7 @@ class ProxyLogging:
         3. /image/generation
         """
         ### ALERTING ###
-        asyncio.create_task(self.response_taking_too_long())
+        asyncio.create_task(self.response_taking_too_long(request_data=data))
 
         try:
             for callback in litellm.callbacks:
@@ -137,6 +137,8 @@ class ProxyLogging:
         start_time: Optional[float] = None,
         end_time: Optional[float] = None,
         type: Literal["hanging_request", "slow_response"] = "hanging_request",
+        request_data: Optional[dict] = None,
+        response_obj: Optional[litellm.ModelResponse] = None,
     ):
         if type == "hanging_request":
             # Simulate a long-running operation that could take more than 5 minutes
@@ -144,8 +146,12 @@ class ProxyLogging:
                 self.alerting_threshold
             )  # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
 
+            alerting_message = (
+                f"Requests are hanging - {self.alerting_threshold}s+ request time"
+            )
             await self.alerting_handler(
-                message=f"Requests are hanging - {self.alerting_threshold}s+ request time",
+                message=alerting_message
+                + f"\nRequest: {request_data}\nResponse: {response_obj}",
                 level="Medium",
             )
 
@@ -184,7 +190,9 @@ class ProxyLogging:
                     raise Exception("Missing SLACK_WEBHOOK_URL from environment")
                 payload = {"text": formatted_message}
                 headers = {"Content-type": "application/json"}
-                async with aiohttp.ClientSession() as session:
+                async with aiohttp.ClientSession(
+                    connector=aiohttp.TCPConnector(ssl=False)
+                ) as session:
                     async with session.post(
                         slack_webhook_url, json=payload, headers=headers
                     ) as response:

From 47797b09f779810c08ef722a89fc216db987e521 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 24 Jan 2024 15:16:18 -0800
Subject: [PATCH 2/6] (feat) proxy - add timestamp to debug logs

---
 litellm/_logging.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/litellm/_logging.py b/litellm/_logging.py
index e9a4a99cd1..d06d8cb6f1 100644
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@@ -7,8 +7,11 @@ handler = logging.StreamHandler()
 handler.setLevel(logging.DEBUG)
 
 # Create a formatter and set it for the handler
+formatter = logging.Formatter(
+    "\033[92m%(asctime)s - %(name)s - %(levelname)s\033[0m: %(message)s",
+    datefmt="%H:%M:%S",
+)
 
-formatter = logging.Formatter("\033[92m%(name)s - %(levelname)s\033[0m: %(message)s")
 
 handler.setFormatter(formatter)
 

From 44718e59e93df60f01508246b9b26c128d346b8e Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 24 Jan 2024 15:17:33 -0800
Subject: [PATCH 3/6] (feat) add request_info to slack alerts

---
 litellm/proxy/utils.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 222a21592e..0520954972 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -138,8 +138,20 @@ class ProxyLogging:
         end_time: Optional[float] = None,
         type: Literal["hanging_request", "slow_response"] = "hanging_request",
         request_data: Optional[dict] = None,
-        response_obj: Optional[litellm.ModelResponse] = None,
     ):
+        if request_data is not None:
+            model = request_data.get("model", "")
+            messages = request_data.get("messages", "")
+            # try casting messages to str and get the first 100 characters, else mark as None
+            try:
+                messages = str(messages)
+                messages = messages[:10000]
+            except:
+                messages = None
+
+            request_info = f"\nRequest Model: {model}\nMessages: {messages}"
+        else:
+            request_info = ""
         if type == "hanging_request":
             # Simulate a long-running operation that could take more than 5 minutes
             await asyncio.sleep(
@@ -150,17 +162,19 @@ class ProxyLogging:
                 f"Requests are hanging - {self.alerting_threshold}s+ request time"
             )
             await self.alerting_handler(
-                message=alerting_message
-                + f"\nRequest: {request_data}\nResponse: {response_obj}",
+                message=alerting_message + request_info,
                 level="Medium",
             )
 
         elif (
             type == "slow_response" and start_time is not None and end_time is not None
         ):
+            slow_message = (
+                f"Responses are slow - {round(end_time-start_time,2)}s response time"
+            )
             if end_time - start_time > self.alerting_threshold:
                 await self.alerting_handler(
-                    message=f"Responses are slow - {round(end_time-start_time,2)}s response time",
+                    message=slow_message + request_info,
                     level="Low",
                 )
 

From 0f51cd0baba2273a77a6de7334d4998469b9bb8e Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 24 Jan 2024 15:25:40 -0800
Subject: [PATCH 4/6] (fix) alerting - show timestamps in alert

---
 litellm/_logging.py    | 2 +-
 litellm/proxy/utils.py | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/litellm/_logging.py b/litellm/_logging.py
index d06d8cb6f1..438fa9743d 100644
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@@ -8,7 +8,7 @@ handler.setLevel(logging.DEBUG)
 
 # Create a formatter and set it for the handler
 formatter = logging.Formatter(
-    "\033[92m%(asctime)s - %(name)s - %(levelname)s\033[0m: %(message)s",
+    "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s",
     datefmt="%H:%M:%S",
 )
 
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 0520954972..ebc2dbc054 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -152,6 +152,7 @@ class ProxyLogging:
             request_info = f"\nRequest Model: {model}\nMessages: {messages}"
         else:
             request_info = ""
+
         if type == "hanging_request":
             # Simulate a long-running operation that could take more than 5 minutes
             await asyncio.sleep(
@@ -193,7 +194,13 @@ class ProxyLogging:
             level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'.
             message: str - what is the alert about
         """
-        formatted_message = f"Level: {level}\n\nMessage: {message}"
+        from datetime import datetime
+
+        # Get the current timestamp
+        current_time = datetime.now().strftime("%H:%M:%S")
+        formatted_message = (
+            f"Level: {level}\nTimestamp: {current_time}\n\nMessage: {message}"
+        )
         if self.alerting is None:
             return
 

From 2686d1f087f878c7302630fecbda9b5cbb9d5556 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 24 Jan 2024 15:58:07 -0800
Subject: [PATCH 5/6] (fix) only alert users when requests are hanging

---
 litellm/proxy/proxy_server.py |  4 ++++
 litellm/proxy/utils.py        | 19 +++++++++++--------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index ca58371f45..d8365404c6 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1863,6 +1863,8 @@ async def chat_completion(
         else:  # router is not set
             response = await litellm.acompletion(**data)
 
+        # Post Call Processing
+        data["litellm_status"] = "success"  # used for alerting
         if hasattr(response, "_hidden_params"):
             model_id = response._hidden_params.get("model_id", None) or ""
         else:
@@ -2048,6 +2050,7 @@ async def embeddings(
             response = await litellm.aembedding(**data)
 
         ### ALERTING ###
+        data["litellm_status"] = "success"  # used for alerting
         end_time = time.time()
         asyncio.create_task(
             proxy_logging_obj.response_taking_too_long(
@@ -2163,6 +2166,7 @@ async def image_generation(
             response = await litellm.aimage_generation(**data)
 
         ### ALERTING ###
+        data["litellm_status"] = "success"  # used for alerting
         end_time = time.time()
         asyncio.create_task(
             proxy_logging_obj.response_taking_too_long(
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index ebc2dbc054..d638d162d4 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -158,14 +158,17 @@ class ProxyLogging:
             await asyncio.sleep(
                 self.alerting_threshold
             )  # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
-
-            alerting_message = (
-                f"Requests are hanging - {self.alerting_threshold}s+ request time"
-            )
-            await self.alerting_handler(
-                message=alerting_message + request_info,
-                level="Medium",
-            )
+            if (
+                request_data is not None
+                and request_data.get("litellm_status", "") != "success"
+            ):
+                alerting_message = (
+                    f"Requests are hanging - {self.alerting_threshold}s+ request time"
+                )
+                await self.alerting_handler(
+                    message=alerting_message + request_info,
+                    level="Medium",
+                )
 
         elif (
             type == "slow_response" and start_time is not None and end_time is not None

From 2addde9279187ee5208a63e7a4afc42c6b4c683b Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 24 Jan 2024 16:07:46 -0800
Subject: [PATCH 6/6] (FIX) improve slack alerting messages

---
 litellm/proxy/proxy_config.yaml | 2 ++
 litellm/proxy/utils.py          | 5 ++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 97168b19f9..b06faac328 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -67,6 +67,8 @@ litellm_settings:
 
 general_settings: 
   master_key: sk-1234
+  alerting: ["slack"]
+  alerting_threshold: 10 # sends alerts if requests hang for 2 seconds
   # database_type: "dynamo_db" 
   # database_args: { # 👈  all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
   #   "billing_mode": "PAY_PER_REQUEST", 
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index d638d162d4..94e86600af 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -162,6 +162,7 @@ class ProxyLogging:
                 request_data is not None
                 and request_data.get("litellm_status", "") != "success"
             ):
+                # only alert hanging responses if they have not been marked as success
                 alerting_message = (
                     f"Requests are hanging - {self.alerting_threshold}s+ request time"
                 )
@@ -173,9 +174,7 @@ class ProxyLogging:
         elif (
             type == "slow_response" and start_time is not None and end_time is not None
         ):
-            slow_message = (
-                f"Responses are slow - {round(end_time-start_time,2)}s response time"
-            )
+            slow_message = f"Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s"
             if end_time - start_time > self.alerting_threshold:
                 await self.alerting_handler(
                     message=slow_message + request_info,