From 1d5e70f7a00e79e57a59b34168fce90873653c58 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Apr 2024 13:05:34 -0700 Subject: [PATCH 1/2] pass alert type on alerting handle --- litellm/integrations/slack_alerting.py | 59 ++++++++++++++++++-------- litellm/proxy/proxy_server.py | 4 +- litellm/proxy/utils.py | 18 ++++++-- 3 files changed, 59 insertions(+), 22 deletions(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 64f9b5384..b9c05d8bf 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -7,7 +7,7 @@ import copy import traceback from litellm._logging import verbose_logger, verbose_proxy_logger import litellm -from typing import List, Literal, Any, Union, Optional +from typing import List, Literal, Any, Union, Optional, Dict from litellm.caching import DualCache import asyncio import aiohttp @@ -37,12 +37,16 @@ class SlackAlerting: "budget_alerts", "db_exceptions", ], + alert_to_webhook_url: Optional[ + Dict + ] = None, # if user wants to separate alerts to diff channels ): self.alerting_threshold = alerting_threshold self.alerting = alerting self.alert_types = alert_types self.internal_usage_cache = DualCache() self.async_http_handler = AsyncHTTPHandler() + self.alert_to_webhook_url = alert_to_webhook_url pass @@ -51,6 +55,7 @@ class SlackAlerting: alerting: Optional[List] = None, alerting_threshold: Optional[float] = None, alert_types: Optional[List] = None, + alert_to_webhook_url: Optional[Dict] = None, ): if alerting is not None: self.alerting = alerting @@ -59,6 +64,13 @@ class SlackAlerting: if alert_types is not None: self.alert_types = alert_types + if alert_to_webhook_url is not None: + # update the dict + if self.alert_to_webhook_url is None: + self.alert_to_webhook_url = alert_to_webhook_url + else: + self.alert_to_webhook_url.update(alert_to_webhook_url) + async def deployment_in_cooldown(self): pass @@ -171,8 +183,6 @@ class SlackAlerting: if self.alerting is None or self.alert_types is None: return - if "llm_too_slow" not in self.alert_types: - return time_difference_float, model, api_base, messages = ( self._response_taking_too_long_callback( kwargs=kwargs, @@ -205,6 +215,7 @@ class SlackAlerting: await self.send_alert( message=slow_message + request_info, level="Low", + alert_type="llm_too_slow", ) async def log_failure_event(self, original_exception: Exception): @@ -241,9 +252,6 @@ class SlackAlerting: request_info = "" if type == "hanging_request": - # Simulate a long-running operation that could take more than 5 minutes - if "llm_requests_hanging" not in self.alert_types: - return await asyncio.sleep( self.alerting_threshold ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests @@ -291,6 +299,7 @@ class SlackAlerting: await self.send_alert( message=alerting_message + request_info, level="Medium", + alert_type="llm_requests_hanging", ) async def budget_alerts( @@ -336,8 +345,7 @@ class SlackAlerting: user_info = f"\nUser ID: {user_id}\n Error {error_message}" message = "Failed Tracking Cost for" + user_info await self.send_alert( - message=message, - level="High", + message=message, level="High", alert_type="budget_alerts" ) return elif type == "projected_limit_exceeded" and user_info is not None: @@ -353,8 +361,7 @@ class SlackAlerting: """ message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` {user_info["key_alias"]} \n`Expected Day of Error`: {user_info["projected_exceeded_date"]} \n`Current Spend`: {user_current_spend} \n`Projected Spend at end of month`: {user_info["projected_spend"]} \n`Soft Limit`: {user_max_budget}""" await self.send_alert( - message=message, - level="High", + message=message, level="High", alert_type="budget_alerts" ) return else: @@ -382,8 +389,7 @@ class SlackAlerting: result = await _cache.async_get_cache(key=message) if result is None: await self.send_alert( - message=message, - level="High", + message=message, level="High", alert_type="budget_alerts" ) await _cache.async_set_cache(key=message, value="SENT", ttl=2419200) return @@ -395,8 +401,7 @@ class SlackAlerting: result = await _cache.async_get_cache(key=cache_key) if result is None: await self.send_alert( - message=message, - level="Medium", + message=message, level="Medium", alert_type="budget_alerts" ) await _cache.async_set_cache(key=cache_key, value="SENT", ttl=2419200) @@ -409,15 +414,25 @@ class SlackAlerting: result = await _cache.async_get_cache(key=message) if result is None: await self.send_alert( - message=message, - level="Low", + message=message, level="Low", alert_type="budget_alerts" ) await _cache.async_set_cache(key=message, value="SENT", ttl=2419200) return return - async def send_alert(self, message: str, level: Literal["Low", "Medium", "High"]): + async def send_alert( + self, + message: str, + level: Literal["Low", "Medium", "High"], + alert_type: Literal[ + "llm_exceptions", + "llm_too_slow", + "llm_requests_hanging", + "budget_alerts", + "db_exceptions", + ], + ): """ Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298 @@ -453,7 +468,15 @@ class SlackAlerting: if _proxy_base_url is not None: formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" - slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None) + # check if we find the slack webhook url in self.alert_to_webhook_url + if ( + self.alert_to_webhook_url is not None + and alert_type in self.alert_to_webhook_url + ): + slack_webhook_url = self.alert_to_webhook_url[alert_type] + else: + slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None) + if slack_webhook_url is None: raise Exception("Missing SLACK_WEBHOOK_URL from environment") payload = {"text": formatted_message} diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 1a26a09fc..4f5b64772 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -8743,7 +8743,9 @@ async def health_services_endpoint( if "slack" in general_settings.get("alerting", []): test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700""" - await proxy_logging_obj.alerting_handler(message=test_message, level="Low") + await proxy_logging_obj.alerting_handler( + message=test_message, level="Low", alert_type="budget_alerts" + ) return { "status": "success", "message": "Mock Slack Alert sent, verify Slack Alert Received on your channel", diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 5d9940b2d..d483f296d 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -256,7 +256,16 @@ class ProxyLogging: ) async def alerting_handler( - self, message: str, level: Literal["Low", "Medium", "High"] + self, + message: str, + level: Literal["Low", "Medium", "High"], + alert_type: Literal[ + "llm_exceptions", + "llm_too_slow", + "llm_requests_hanging", + "budget_alerts", + "db_exceptions", + ], ): """ Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298 @@ -289,7 +298,7 @@ class ProxyLogging: for client in self.alerting: if client == "slack": await self.slack_alerting_instance.send_alert( - message=message, level=level + message=message, level=level, alert_type=alert_type ) elif client == "sentry": if litellm.utils.sentry_sdk_instance is not None: @@ -323,6 +332,7 @@ class ProxyLogging: self.alerting_handler( message=f"DB read/write call failed: {error_message}", level="High", + alert_type="db_exceptions", ) ) @@ -354,7 +364,9 @@ class ProxyLogging: return asyncio.create_task( self.alerting_handler( - message=f"LLM API call failed: {str(original_exception)}", level="High" + message=f"LLM API call failed: {str(original_exception)}", + level="High", + alert_type="llm_exceptions", ) ) From 2aa849b7aecfee175b400d3b05cbf0666ccfe2c7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 25 Apr 2024 13:06:17 -0700 Subject: [PATCH 2/2] fix test alerting --- litellm/tests/test_alerting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 311b80311..ff3e8f8c7 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -68,6 +68,7 @@ async def test_get_api_base(): await _pl.alerting_handler( message=slow_message + request_info, level="Low", + alert_type="llm_too_slow", ) print("passed test_get_api_base")