forked from phoenix/litellm-mirror
Merge pull request #3307 from BerriAI/litellm_set_alerts_per_channel
[Backend-Alerting] Separate alerting for different channels
This commit is contained in:
commit
de6e03f410
4 changed files with 60 additions and 22 deletions
|
@ -7,7 +7,7 @@ import copy
|
||||||
import traceback
|
import traceback
|
||||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||||
import litellm
|
import litellm
|
||||||
from typing import List, Literal, Any, Union, Optional
|
from typing import List, Literal, Any, Union, Optional, Dict
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
import asyncio
|
import asyncio
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
@ -37,12 +37,16 @@ class SlackAlerting:
|
||||||
"budget_alerts",
|
"budget_alerts",
|
||||||
"db_exceptions",
|
"db_exceptions",
|
||||||
],
|
],
|
||||||
|
alert_to_webhook_url: Optional[
|
||||||
|
Dict
|
||||||
|
] = None, # if user wants to separate alerts to diff channels
|
||||||
):
|
):
|
||||||
self.alerting_threshold = alerting_threshold
|
self.alerting_threshold = alerting_threshold
|
||||||
self.alerting = alerting
|
self.alerting = alerting
|
||||||
self.alert_types = alert_types
|
self.alert_types = alert_types
|
||||||
self.internal_usage_cache = DualCache()
|
self.internal_usage_cache = DualCache()
|
||||||
self.async_http_handler = AsyncHTTPHandler()
|
self.async_http_handler = AsyncHTTPHandler()
|
||||||
|
self.alert_to_webhook_url = alert_to_webhook_url
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -51,6 +55,7 @@ class SlackAlerting:
|
||||||
alerting: Optional[List] = None,
|
alerting: Optional[List] = None,
|
||||||
alerting_threshold: Optional[float] = None,
|
alerting_threshold: Optional[float] = None,
|
||||||
alert_types: Optional[List] = None,
|
alert_types: Optional[List] = None,
|
||||||
|
alert_to_webhook_url: Optional[Dict] = None,
|
||||||
):
|
):
|
||||||
if alerting is not None:
|
if alerting is not None:
|
||||||
self.alerting = alerting
|
self.alerting = alerting
|
||||||
|
@ -59,6 +64,13 @@ class SlackAlerting:
|
||||||
if alert_types is not None:
|
if alert_types is not None:
|
||||||
self.alert_types = alert_types
|
self.alert_types = alert_types
|
||||||
|
|
||||||
|
if alert_to_webhook_url is not None:
|
||||||
|
# update the dict
|
||||||
|
if self.alert_to_webhook_url is None:
|
||||||
|
self.alert_to_webhook_url = alert_to_webhook_url
|
||||||
|
else:
|
||||||
|
self.alert_to_webhook_url.update(alert_to_webhook_url)
|
||||||
|
|
||||||
async def deployment_in_cooldown(self):
|
async def deployment_in_cooldown(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -171,8 +183,6 @@ class SlackAlerting:
|
||||||
if self.alerting is None or self.alert_types is None:
|
if self.alerting is None or self.alert_types is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
if "llm_too_slow" not in self.alert_types:
|
|
||||||
return
|
|
||||||
time_difference_float, model, api_base, messages = (
|
time_difference_float, model, api_base, messages = (
|
||||||
self._response_taking_too_long_callback(
|
self._response_taking_too_long_callback(
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
|
@ -205,6 +215,7 @@ class SlackAlerting:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=slow_message + request_info,
|
message=slow_message + request_info,
|
||||||
level="Low",
|
level="Low",
|
||||||
|
alert_type="llm_too_slow",
|
||||||
)
|
)
|
||||||
|
|
||||||
async def log_failure_event(self, original_exception: Exception):
|
async def log_failure_event(self, original_exception: Exception):
|
||||||
|
@ -241,9 +252,6 @@ class SlackAlerting:
|
||||||
request_info = ""
|
request_info = ""
|
||||||
|
|
||||||
if type == "hanging_request":
|
if type == "hanging_request":
|
||||||
# Simulate a long-running operation that could take more than 5 minutes
|
|
||||||
if "llm_requests_hanging" not in self.alert_types:
|
|
||||||
return
|
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(
|
||||||
self.alerting_threshold
|
self.alerting_threshold
|
||||||
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
||||||
|
@ -291,6 +299,7 @@ class SlackAlerting:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=alerting_message + request_info,
|
message=alerting_message + request_info,
|
||||||
level="Medium",
|
level="Medium",
|
||||||
|
alert_type="llm_requests_hanging",
|
||||||
)
|
)
|
||||||
|
|
||||||
async def budget_alerts(
|
async def budget_alerts(
|
||||||
|
@ -336,8 +345,7 @@ class SlackAlerting:
|
||||||
user_info = f"\nUser ID: {user_id}\n Error {error_message}"
|
user_info = f"\nUser ID: {user_id}\n Error {error_message}"
|
||||||
message = "Failed Tracking Cost for" + user_info
|
message = "Failed Tracking Cost for" + user_info
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message, level="High", alert_type="budget_alerts"
|
||||||
level="High",
|
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
elif type == "projected_limit_exceeded" and user_info is not None:
|
elif type == "projected_limit_exceeded" and user_info is not None:
|
||||||
|
@ -353,8 +361,7 @@ class SlackAlerting:
|
||||||
"""
|
"""
|
||||||
message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` {user_info["key_alias"]} \n`Expected Day of Error`: {user_info["projected_exceeded_date"]} \n`Current Spend`: {user_current_spend} \n`Projected Spend at end of month`: {user_info["projected_spend"]} \n`Soft Limit`: {user_max_budget}"""
|
message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` {user_info["key_alias"]} \n`Expected Day of Error`: {user_info["projected_exceeded_date"]} \n`Current Spend`: {user_current_spend} \n`Projected Spend at end of month`: {user_info["projected_spend"]} \n`Soft Limit`: {user_max_budget}"""
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message, level="High", alert_type="budget_alerts"
|
||||||
level="High",
|
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
|
@ -382,8 +389,7 @@ class SlackAlerting:
|
||||||
result = await _cache.async_get_cache(key=message)
|
result = await _cache.async_get_cache(key=message)
|
||||||
if result is None:
|
if result is None:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message, level="High", alert_type="budget_alerts"
|
||||||
level="High",
|
|
||||||
)
|
)
|
||||||
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
||||||
return
|
return
|
||||||
|
@ -395,8 +401,7 @@ class SlackAlerting:
|
||||||
result = await _cache.async_get_cache(key=cache_key)
|
result = await _cache.async_get_cache(key=cache_key)
|
||||||
if result is None:
|
if result is None:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message, level="Medium", alert_type="budget_alerts"
|
||||||
level="Medium",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await _cache.async_set_cache(key=cache_key, value="SENT", ttl=2419200)
|
await _cache.async_set_cache(key=cache_key, value="SENT", ttl=2419200)
|
||||||
|
@ -409,15 +414,25 @@ class SlackAlerting:
|
||||||
result = await _cache.async_get_cache(key=message)
|
result = await _cache.async_get_cache(key=message)
|
||||||
if result is None:
|
if result is None:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
message=message,
|
message=message, level="Low", alert_type="budget_alerts"
|
||||||
level="Low",
|
|
||||||
)
|
)
|
||||||
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
|
||||||
return
|
return
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
async def send_alert(self, message: str, level: Literal["Low", "Medium", "High"]):
|
async def send_alert(
|
||||||
|
self,
|
||||||
|
message: str,
|
||||||
|
level: Literal["Low", "Medium", "High"],
|
||||||
|
alert_type: Literal[
|
||||||
|
"llm_exceptions",
|
||||||
|
"llm_too_slow",
|
||||||
|
"llm_requests_hanging",
|
||||||
|
"budget_alerts",
|
||||||
|
"db_exceptions",
|
||||||
|
],
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298
|
Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298
|
||||||
|
|
||||||
|
@ -453,7 +468,15 @@ class SlackAlerting:
|
||||||
if _proxy_base_url is not None:
|
if _proxy_base_url is not None:
|
||||||
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
|
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
|
||||||
|
|
||||||
slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None)
|
# check if we find the slack webhook url in self.alert_to_webhook_url
|
||||||
|
if (
|
||||||
|
self.alert_to_webhook_url is not None
|
||||||
|
and alert_type in self.alert_to_webhook_url
|
||||||
|
):
|
||||||
|
slack_webhook_url = self.alert_to_webhook_url[alert_type]
|
||||||
|
else:
|
||||||
|
slack_webhook_url = os.getenv("SLACK_WEBHOOK_URL", None)
|
||||||
|
|
||||||
if slack_webhook_url is None:
|
if slack_webhook_url is None:
|
||||||
raise Exception("Missing SLACK_WEBHOOK_URL from environment")
|
raise Exception("Missing SLACK_WEBHOOK_URL from environment")
|
||||||
payload = {"text": formatted_message}
|
payload = {"text": formatted_message}
|
||||||
|
|
|
@ -8743,7 +8743,9 @@ async def health_services_endpoint(
|
||||||
|
|
||||||
if "slack" in general_settings.get("alerting", []):
|
if "slack" in general_settings.get("alerting", []):
|
||||||
test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
|
test_message = f"""\n🚨 `ProjectedLimitExceededError` 💸\n\n`Key Alias:` litellm-ui-test-alert \n`Expected Day of Error`: 28th March \n`Current Spend`: $100.00 \n`Projected Spend at end of month`: $1000.00 \n`Soft Limit`: $700"""
|
||||||
await proxy_logging_obj.alerting_handler(message=test_message, level="Low")
|
await proxy_logging_obj.alerting_handler(
|
||||||
|
message=test_message, level="Low", alert_type="budget_alerts"
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",
|
"message": "Mock Slack Alert sent, verify Slack Alert Received on your channel",
|
||||||
|
|
|
@ -256,7 +256,16 @@ class ProxyLogging:
|
||||||
)
|
)
|
||||||
|
|
||||||
async def alerting_handler(
|
async def alerting_handler(
|
||||||
self, message: str, level: Literal["Low", "Medium", "High"]
|
self,
|
||||||
|
message: str,
|
||||||
|
level: Literal["Low", "Medium", "High"],
|
||||||
|
alert_type: Literal[
|
||||||
|
"llm_exceptions",
|
||||||
|
"llm_too_slow",
|
||||||
|
"llm_requests_hanging",
|
||||||
|
"budget_alerts",
|
||||||
|
"db_exceptions",
|
||||||
|
],
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298
|
Alerting based on thresholds: - https://github.com/BerriAI/litellm/issues/1298
|
||||||
|
@ -289,7 +298,7 @@ class ProxyLogging:
|
||||||
for client in self.alerting:
|
for client in self.alerting:
|
||||||
if client == "slack":
|
if client == "slack":
|
||||||
await self.slack_alerting_instance.send_alert(
|
await self.slack_alerting_instance.send_alert(
|
||||||
message=message, level=level
|
message=message, level=level, alert_type=alert_type
|
||||||
)
|
)
|
||||||
elif client == "sentry":
|
elif client == "sentry":
|
||||||
if litellm.utils.sentry_sdk_instance is not None:
|
if litellm.utils.sentry_sdk_instance is not None:
|
||||||
|
@ -323,6 +332,7 @@ class ProxyLogging:
|
||||||
self.alerting_handler(
|
self.alerting_handler(
|
||||||
message=f"DB read/write call failed: {error_message}",
|
message=f"DB read/write call failed: {error_message}",
|
||||||
level="High",
|
level="High",
|
||||||
|
alert_type="db_exceptions",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -354,7 +364,9 @@ class ProxyLogging:
|
||||||
return
|
return
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
self.alerting_handler(
|
self.alerting_handler(
|
||||||
message=f"LLM API call failed: {str(original_exception)}", level="High"
|
message=f"LLM API call failed: {str(original_exception)}",
|
||||||
|
level="High",
|
||||||
|
alert_type="llm_exceptions",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -68,6 +68,7 @@ async def test_get_api_base():
|
||||||
await _pl.alerting_handler(
|
await _pl.alerting_handler(
|
||||||
message=slow_message + request_info,
|
message=slow_message + request_info,
|
||||||
level="Low",
|
level="Low",
|
||||||
|
alert_type="llm_too_slow",
|
||||||
)
|
)
|
||||||
print("passed test_get_api_base")
|
print("passed test_get_api_base")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue