From 3fe475c9cedf988ee6ade9e4788e57a0f27b3fd9 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 24 Jan 2024 14:55:21 -0800 Subject: [PATCH 1/6] (feat) slack alerting - log request/response --- litellm/proxy/utils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 978355568c..222a21592e 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -97,7 +97,7 @@ class ProxyLogging: 3. /image/generation """ ### ALERTING ### - asyncio.create_task(self.response_taking_too_long()) + asyncio.create_task(self.response_taking_too_long(request_data=data)) try: for callback in litellm.callbacks: @@ -137,6 +137,8 @@ class ProxyLogging: start_time: Optional[float] = None, end_time: Optional[float] = None, type: Literal["hanging_request", "slow_response"] = "hanging_request", + request_data: Optional[dict] = None, + response_obj: Optional[litellm.ModelResponse] = None, ): if type == "hanging_request": # Simulate a long-running operation that could take more than 5 minutes @@ -144,8 +146,12 @@ class ProxyLogging: self.alerting_threshold ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests + alerting_message = ( + f"Requests are hanging - {self.alerting_threshold}s+ request time" + ) await self.alerting_handler( - message=f"Requests are hanging - {self.alerting_threshold}s+ request time", + message=alerting_message + + f"\nRequest: {request_data}\nResponse: {response_obj}", level="Medium", ) @@ -184,7 +190,9 @@ class ProxyLogging: raise Exception("Missing SLACK_WEBHOOK_URL from environment") payload = {"text": formatted_message} headers = {"Content-type": "application/json"} - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession( + connector=aiohttp.TCPConnector(ssl=False) + ) as session: async with session.post( slack_webhook_url, json=payload, headers=headers ) as response: From 47797b09f779810c08ef722a89fc216db987e521 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 24 Jan 2024 15:16:18 -0800 Subject: [PATCH 2/6] (feat) proxy - add timestamp to debug logs --- litellm/_logging.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/_logging.py b/litellm/_logging.py index e9a4a99cd1..d06d8cb6f1 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -7,8 +7,11 @@ handler = logging.StreamHandler() handler.setLevel(logging.DEBUG) # Create a formatter and set it for the handler +formatter = logging.Formatter( + "\033[92m%(asctime)s - %(name)s - %(levelname)s\033[0m: %(message)s", + datefmt="%H:%M:%S", +) -formatter = logging.Formatter("\033[92m%(name)s - %(levelname)s\033[0m: %(message)s") handler.setFormatter(formatter) From 44718e59e93df60f01508246b9b26c128d346b8e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 24 Jan 2024 15:17:33 -0800 Subject: [PATCH 3/6] (feat) add request_info to slack alerts --- litellm/proxy/utils.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 222a21592e..0520954972 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -138,8 +138,20 @@ class ProxyLogging: end_time: Optional[float] = None, type: Literal["hanging_request", "slow_response"] = "hanging_request", request_data: Optional[dict] = None, - response_obj: Optional[litellm.ModelResponse] = None, ): + if request_data is not None: + model = request_data.get("model", "") + messages = request_data.get("messages", "") + # try casting messages to str and get the first 100 characters, else mark as None + try: + messages = str(messages) + messages = messages[:10000] + except: + messages = None + + request_info = f"\nRequest Model: {model}\nMessages: {messages}" + else: + request_info = "" if type == "hanging_request": # Simulate a long-running operation that could take more than 5 minutes await asyncio.sleep( @@ -150,17 +162,19 @@ class ProxyLogging: f"Requests are hanging - {self.alerting_threshold}s+ request time" ) await self.alerting_handler( - message=alerting_message - + f"\nRequest: {request_data}\nResponse: {response_obj}", + message=alerting_message + request_info, level="Medium", ) elif ( type == "slow_response" and start_time is not None and end_time is not None ): + slow_message = ( + f"Responses are slow - {round(end_time-start_time,2)}s response time" + ) if end_time - start_time > self.alerting_threshold: await self.alerting_handler( - message=f"Responses are slow - {round(end_time-start_time,2)}s response time", + message=slow_message + request_info, level="Low", ) From 0f51cd0baba2273a77a6de7334d4998469b9bb8e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 24 Jan 2024 15:25:40 -0800 Subject: [PATCH 4/6] (fix) alerting - show timestamps in alert --- litellm/_logging.py | 2 +- litellm/proxy/utils.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/litellm/_logging.py b/litellm/_logging.py index d06d8cb6f1..438fa9743d 100644 --- a/litellm/_logging.py +++ b/litellm/_logging.py @@ -8,7 +8,7 @@ handler.setLevel(logging.DEBUG) # Create a formatter and set it for the handler formatter = logging.Formatter( - "\033[92m%(asctime)s - %(name)s - %(levelname)s\033[0m: %(message)s", + "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s", datefmt="%H:%M:%S", ) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 0520954972..ebc2dbc054 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -152,6 +152,7 @@ class ProxyLogging: request_info = f"\nRequest Model: {model}\nMessages: {messages}" else: request_info = "" + if type == "hanging_request": # Simulate a long-running operation that could take more than 5 minutes await asyncio.sleep( @@ -193,7 +194,13 @@ class ProxyLogging: level: str - Low|Medium|High - if calls might fail (Medium) or are failing (High); Currently, no alerts would be 'Low'. message: str - what is the alert about """ - formatted_message = f"Level: {level}\n\nMessage: {message}" + from datetime import datetime + + # Get the current timestamp + current_time = datetime.now().strftime("%H:%M:%S") + formatted_message = ( + f"Level: {level}\nTimestamp: {current_time}\n\nMessage: {message}" + ) if self.alerting is None: return From 2686d1f087f878c7302630fecbda9b5cbb9d5556 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 24 Jan 2024 15:58:07 -0800 Subject: [PATCH 5/6] (fix) only alert users when requests are hanging --- litellm/proxy/proxy_server.py | 4 ++++ litellm/proxy/utils.py | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index ca58371f45..d8365404c6 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1863,6 +1863,8 @@ async def chat_completion( else: # router is not set response = await litellm.acompletion(**data) + # Post Call Processing + data["litellm_status"] = "success" # used for alerting if hasattr(response, "_hidden_params"): model_id = response._hidden_params.get("model_id", None) or "" else: @@ -2048,6 +2050,7 @@ async def embeddings( response = await litellm.aembedding(**data) ### ALERTING ### + data["litellm_status"] = "success" # used for alerting end_time = time.time() asyncio.create_task( proxy_logging_obj.response_taking_too_long( @@ -2163,6 +2166,7 @@ async def image_generation( response = await litellm.aimage_generation(**data) ### ALERTING ### + data["litellm_status"] = "success" # used for alerting end_time = time.time() asyncio.create_task( proxy_logging_obj.response_taking_too_long( diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index ebc2dbc054..d638d162d4 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -158,14 +158,17 @@ class ProxyLogging: await asyncio.sleep( self.alerting_threshold ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests - - alerting_message = ( - f"Requests are hanging - {self.alerting_threshold}s+ request time" - ) - await self.alerting_handler( - message=alerting_message + request_info, - level="Medium", - ) + if ( + request_data is not None + and request_data.get("litellm_status", "") != "success" + ): + alerting_message = ( + f"Requests are hanging - {self.alerting_threshold}s+ request time" + ) + await self.alerting_handler( + message=alerting_message + request_info, + level="Medium", + ) elif ( type == "slow_response" and start_time is not None and end_time is not None From 2addde9279187ee5208a63e7a4afc42c6b4c683b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 24 Jan 2024 16:07:46 -0800 Subject: [PATCH 6/6] (FIX) improve slack alerting messages --- litellm/proxy/proxy_config.yaml | 2 ++ litellm/proxy/utils.py | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 97168b19f9..b06faac328 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -67,6 +67,8 @@ litellm_settings: general_settings: master_key: sk-1234 + alerting: ["slack"] + alerting_threshold: 10 # sends alerts if requests hang for 2 seconds # database_type: "dynamo_db" # database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190 # "billing_mode": "PAY_PER_REQUEST", diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index d638d162d4..94e86600af 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -162,6 +162,7 @@ class ProxyLogging: request_data is not None and request_data.get("litellm_status", "") != "success" ): + # only alert hanging responses if they have not been marked as success alerting_message = ( f"Requests are hanging - {self.alerting_threshold}s+ request time" ) @@ -173,9 +174,7 @@ class ProxyLogging: elif ( type == "slow_response" and start_time is not None and end_time is not None ): - slow_message = ( - f"Responses are slow - {round(end_time-start_time,2)}s response time" - ) + slow_message = f"Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s" if end_time - start_time > self.alerting_threshold: await self.alerting_handler( message=slow_message + request_info,