pass alert type on alerting handle

This commit is contained in:
Ishaan Jaff 2024-04-25 13:05:34 -07:00
parent 6337f5abd5
commit 1d5e70f7a0
3 changed files with 59 additions and 22 deletions

View file

@ -7,7 +7,7 @@ import copy
import traceback import traceback
from litellm._logging import verbose_logger, verbose_proxy_logger from litellm._logging import verbose_logger, verbose_proxy_logger
import litellm import litellm
from typing import List, Literal, Any, Union, Optional from typing import List, Literal, Any, Union, Optional, Dict
from litellm.caching import DualCache from litellm.caching import DualCache
import asyncio import asyncio
import aiohttp import aiohttp
@ -37,12 +37,16 @@ class SlackAlerting:
"budget_alerts", "budget_alerts",
"db_exceptions", "db_exceptions",
], ],
alert_to_webhook_url: Optional[
Dict
] = None, # if user wants to separate alerts to diff channels
): ):
self.alerting_threshold = alerting_threshold self.alerting_threshold = alerting_threshold
self.alerting = alerting self.alerting = alerting
self.alert_types = alert_types self.alert_types = alert_types
self.internal_usage_cache = DualCache() self.internal_usage_cache = DualCache()
self.async_http_handler = AsyncHTTPHandler() self.async_http_handler = AsyncHTTPHandler()
self.alert_to_webhook_url = alert_to_webhook_url
pass pass
@ -51,6 +55,7 @@ class SlackAlerting:
alerting: Optional[List] = None, alerting: Optional[List] = None,
alerting_threshold: Optional[float] = None, alerting_threshold: Optional[float] = None,
alert_types: Optional[List] = None, alert_types: Optional[List] = None,
alert_to_webhook_url: Optional[Dict] = None,
): ):
if alerting is not None: if alerting is not None:
self.alerting = alerting self.alerting = alerting
@ -59,6 +64,13 @@ class SlackAlerting:
if alert_types is not None: if alert_types is not None:
self.alert_types = alert_types self.alert_types = alert_types
if alert_to_webhook_url is not None:
# update the dict
if self.alert_to_webhook_url is None:
self.alert_to_webhook_url = alert_to_webhook_url
else:
self.alert_to_webhook_url.update(alert_to_webhook_url)
async def deployment_in_cooldown(self): async def deployment_in_cooldown(self):
pass pass
@ -171,8 +183,6 @@ class SlackAlerting:
if self.alerting is None or self.alert_types is None: if self.alerting is None or self.alert_types is None:
return return
if "llm_too_slow" not in self.alert_types:
return
time_difference_float, model, api_base, messages = ( time_difference_float, model, api_base, messages = (
self._response_taking_too_long_callback( self._response_taking_too_long_callback(
kwargs=kwargs, kwargs=kwargs,
@ -205,6 +215,7 @@ class SlackAlerting:
await self.send_alert( await self.send_alert(
message=slow_message + request_info, message=slow_message + request_info,
level="Low", level="Low",
alert_type="llm_too_slow",
) )
async def log_failure_event(self, original_exception: Exception): async def log_failure_event(self, original_exception: Exception):
@ -241,9 +252,6 @@ class SlackAlerting:
request_info = "" request_info = ""
if type == "hanging_request": if type == "hanging_request":
# Simulate a long-running operation that could take more than 5 minutes
if "llm_requests_hanging" not in self.alert_types:
return
await asyncio.sleep( await asyncio.sleep(
self.alerting_threshold self.alerting_threshold
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
@ -291,6 +299,7 @@ class SlackAlerting:
await self.send_alert( await self.send_alert(
message=alerting_message + request_info, message=alerting_message + request_info,
level="Medium", level="Medium",
alert_type="llm_requests_hanging",
) )
async def budget_alerts( async def budget_alerts(
@ -336,8 +345,7 @@ class SlackAlerting:
user_info = f"\nUser ID: {user_id}\n Error {error_message}" user_info = f"\nUser ID: {user_id}\n Error {error_message}"
message = "Failed Tracking Cost for" + user_info message = "Failed Tracking Cost for" + user_info
await self.send_alert( await self.send_alert(
message=message, message=message, level="High", alert_type="budget_alerts"
level="High",
) )
return return
elif type == "projected_limit_exceeded" and user_info is not None: elif type == "projected_limit_exceeded" and user_info is not None:
@ -353,8 +361,7 @@ class SlackAlerting:
""" """
message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` {user_info["key_alias"]} \n`Expected Day of Error`: {user_info["projected_exceeded_date"]} \n`Current Spend`: {user_current_spend} \n`Projected Spend at end of month`: {user_info["projected_spend"]} \n`Soft Limit`: {user_max_budget}""" message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` {user_info["key_alias"]} \n`Expected Day of Error`: {user_info["projected_exceeded_date"]} \n`Current Spend`: {user_current_spend} \n`Projected Spend at end of month`: {user_info["projected_spend"]} \n`Soft Limit`: {user_max_budget}"""
await self.send_alert( await self.send_alert(
message=message, message=message, level="High", alert_type="budget_alerts"
level="High",
) )
return return
else: else:
@ -382,8 +389,7 @@ class SlackAlerting:
result = await _cache.async_get_cache(key=message) result = await _cache.async_get_cache(key=message)
if result is None: if result is None:
await self.send_alert( await self.send_alert(
message=message, message=message, level="High", alert_type="budget_alerts"
level="High",
) )
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200) await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
return return
@ -395,8 +401,7 @@ class SlackAlerting:
result = await _cache.async_get_cache(key=cache_key) result = await _cache.async_get_cache(key=cache_key)
if result is None: if result is None:
await self.send_alert( await self.send_alert(
message=message, message=message, level="Medium", alert_type="budget_alerts"
level="Medium",
) )
await _cache.async_set_cache(key=cache_key, value="SENT", ttl=2419200) await _cache.async_set_cache(key=cache_key, value="SENT", ttl=2419200)
@ -409,15 +414,25 @@ class SlackAlerting:
result = await _cache.async_get_cache(key=message) result = await _cache.async_get_cache(key=message)
if result is None: if result is None:
await self.send_alert( await self.send_alert(
message=message, message=message, level="Low", alert_type="budget_alerts"
level="Low",
) )
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200) await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
return return
return return
async def send_alert(self, message: str, level: Literal["Low", "Medium", "High"]): async def send_alert(
self,
message: str,
level: Literal["Low", "Medium", "High"],
alert_type: Literal[
"llm_exceptions",
"llm_too_slow",
"llm_requests_hanging",
"budget_alerts",
"db_exceptions",
],
):
""" """
Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298 Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298
@ -453,7 +468,15 @@ class SlackAlerting:
if _proxy_base_url is not None: if _proxy_base_url is not None:
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
# check if we find the slack webhook url in self.alert_to_webhook_url
if (
self.alert_to_webhook_url is not None
and alert_type in self.alert_to_webhook_url
):
slack_webhook_url = self.alert_to_webhook_url[alert_type]
else:
slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None) slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None)
if slack_webhook_url is None: if slack_webhook_url is None:
raise Exception("Missing SLACK_WEBHOOK_URL from environment") raise Exception("Missing SLACK_WEBHOOK_URL from environment")
payload = {"text": formatted_message} payload = {"text": formatted_message}

View file

@ -8743,7 +8743,9 @@ async def health_services_endpoint(
if "slack" in general_settings.get("alerting", []): if "slack" in general_settings.get("alerting", []):
test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700""" test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
await proxy_logging_obj.alerting_handler(message=test_message, level="Low") await proxy_logging_obj.alerting_handler(
message=test_message, level="Low", alert_type="budget_alerts"
)
return { return {
"status": "success", "status": "success",
"message": "Mock Slack Alert sent, verify Slack Alert Received on your channel", "message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",

View file

@ -256,7 +256,16 @@ class ProxyLogging:
) )
async def alerting_handler( async def alerting_handler(
self, message: str, level: Literal["Low", "Medium", "High"] self,
message: str,
level: Literal["Low", "Medium", "High"],
alert_type: Literal[
"llm_exceptions",
"llm_too_slow",
"llm_requests_hanging",
"budget_alerts",
"db_exceptions",
],
): ):
""" """
Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298 Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298
@ -289,7 +298,7 @@ class ProxyLogging:
for client in self.alerting: for client in self.alerting:
if client == "slack": if client == "slack":
await self.slack_alerting_instance.send_alert( await self.slack_alerting_instance.send_alert(
message=message, level=level message=message, level=level, alert_type=alert_type
) )
elif client == "sentry": elif client == "sentry":
if litellm.utils.sentry_sdk_instance is not None: if litellm.utils.sentry_sdk_instance is not None:
@ -323,6 +332,7 @@ class ProxyLogging:
self.alerting_handler( self.alerting_handler(
message=f"DB read/write call failed: {error_message}", message=f"DB read/write call failed: {error_message}",
level="High", level="High",
alert_type="db_exceptions",
) )
) )
@ -354,7 +364,9 @@ class ProxyLogging:
return return
asyncio.create_task( asyncio.create_task(
self.alerting_handler( self.alerting_handler(
message=f"LLM API call failed: {str(original_exception)}", level="High" message=f"LLM API call failed: {str(original_exception)}",
level="High",
alert_type="llm_exceptions",
) )
) )