fix(slack_alerting.py): allow new 'alerting_metadata' arg

Allows user to pass in additional alerting metadata for debugging
This commit is contained in:
Krrish Dholakia 2024-06-14 16:06:47 -07:00
parent f1bff50d60
commit 1cce99300f
4 changed files with 85 additions and 13 deletions

View file

@ -330,6 +330,7 @@ class SlackAlerting(CustomLogger):
messages = "Message not logged. litellm.redact_messages_in_exceptions=True" messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`" request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`" slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
alerting_metadata: dict = {}
if time_difference_float > self.alerting_threshold: if time_difference_float > self.alerting_threshold:
# add deployment latencies to alert # add deployment latencies to alert
if ( if (
@ -337,7 +338,7 @@ class SlackAlerting(CustomLogger):
and "litellm_params" in kwargs and "litellm_params" in kwargs
and "metadata" in kwargs["litellm_params"] and "metadata" in kwargs["litellm_params"]
): ):
_metadata = kwargs["litellm_params"]["metadata"] _metadata: dict = kwargs["litellm_params"]["metadata"]
request_info = litellm.utils._add_key_name_and_team_to_alert( request_info = litellm.utils._add_key_name_and_team_to_alert(
request_info=request_info, metadata=_metadata request_info=request_info, metadata=_metadata
) )
@ -349,10 +350,14 @@ class SlackAlerting(CustomLogger):
request_info += ( request_info += (
f"\nAvailable Deployment Latencies\n{_deployment_latency_map}" f"\nAvailable Deployment Latencies\n{_deployment_latency_map}"
) )
if "alerting_metadata" in _metadata:
alerting_metadata = _metadata["alerting_metadata"]
await self.send_alert( await self.send_alert(
message=slow_message + request_info, message=slow_message + request_info,
level="Low", level="Low",
alert_type="llm_too_slow", alert_type="llm_too_slow",
alerting_metadata=alerting_metadata,
) )
async def async_update_daily_reports( async def async_update_daily_reports(
@ -540,7 +545,12 @@ class SlackAlerting(CustomLogger):
message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s" message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s"
# send alert # send alert
await self.send_alert(message=message, level="Low", alert_type="daily_reports") await self.send_alert(
message=message,
level="Low",
alert_type="daily_reports",
alerting_metadata={},
)
return True return True
@ -582,6 +592,7 @@ class SlackAlerting(CustomLogger):
await asyncio.sleep( await asyncio.sleep(
self.alerting_threshold self.alerting_threshold
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
alerting_metadata: dict = {}
if ( if (
request_data is not None request_data is not None
and request_data.get("litellm_status", "") != "success" and request_data.get("litellm_status", "") != "success"
@ -606,7 +617,7 @@ class SlackAlerting(CustomLogger):
): ):
# In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data`` # In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data``
# in that case we fallback to the api base set in the request metadata # in that case we fallback to the api base set in the request metadata
_metadata = request_data["metadata"] _metadata: dict = request_data["metadata"]
_api_base = _metadata.get("api_base", "") _api_base = _metadata.get("api_base", "")
request_info = litellm.utils._add_key_name_and_team_to_alert( request_info = litellm.utils._add_key_name_and_team_to_alert(
@ -615,6 +626,9 @@ class SlackAlerting(CustomLogger):
if _api_base is None: if _api_base is None:
_api_base = "" _api_base = ""
if "alerting_metadata" in _metadata:
alerting_metadata = _metadata["alerting_metadata"]
request_info += f"\nAPI Base: `{_api_base}`" request_info += f"\nAPI Base: `{_api_base}`"
# only alert hanging responses if they have not been marked as success # only alert hanging responses if they have not been marked as success
alerting_message = ( alerting_message = (
@ -640,6 +654,7 @@ class SlackAlerting(CustomLogger):
message=alerting_message + request_info, message=alerting_message + request_info,
level="Medium", level="Medium",
alert_type="llm_requests_hanging", alert_type="llm_requests_hanging",
alerting_metadata=alerting_metadata,
) )
async def failed_tracking_alert(self, error_message: str): async def failed_tracking_alert(self, error_message: str):
@ -650,7 +665,10 @@ class SlackAlerting(CustomLogger):
result = await _cache.async_get_cache(key=_cache_key) result = await _cache.async_get_cache(key=_cache_key)
if result is None: if result is None:
await self.send_alert( await self.send_alert(
message=message, level="High", alert_type="budget_alerts" message=message,
level="High",
alert_type="budget_alerts",
alerting_metadata={},
) )
await _cache.async_set_cache( await _cache.async_set_cache(
key=_cache_key, key=_cache_key,
@ -751,6 +769,7 @@ class SlackAlerting(CustomLogger):
level="High", level="High",
alert_type="budget_alerts", alert_type="budget_alerts",
user_info=webhook_event, user_info=webhook_event,
alerting_metadata={},
) )
await _cache.async_set_cache( await _cache.async_set_cache(
key=_cache_key, key=_cache_key,
@ -941,7 +960,10 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert( await self.send_alert(
message=msg, level="Medium", alert_type="outage_alerts" message=msg,
level="Medium",
alert_type="outage_alerts",
alerting_metadata={},
) )
# set to true # set to true
outage_value["minor_alert_sent"] = True outage_value["minor_alert_sent"] = True
@ -963,7 +985,12 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert(message=msg, level="High", alert_type="outage_alerts") await self.send_alert(
message=msg,
level="High",
alert_type="outage_alerts",
alerting_metadata={},
)
# set to true # set to true
outage_value["major_alert_sent"] = True outage_value["major_alert_sent"] = True
@ -1062,7 +1089,10 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert( await self.send_alert(
message=msg, level="Medium", alert_type="outage_alerts" message=msg,
level="Medium",
alert_type="outage_alerts",
alerting_metadata={},
) )
# set to true # set to true
outage_value["minor_alert_sent"] = True outage_value["minor_alert_sent"] = True
@ -1081,7 +1111,10 @@ class SlackAlerting(CustomLogger):
) )
# send minor alert # send minor alert
await self.send_alert( await self.send_alert(
message=msg, level="High", alert_type="outage_alerts" message=msg,
level="High",
alert_type="outage_alerts",
alerting_metadata={},
) )
# set to true # set to true
outage_value["major_alert_sent"] = True outage_value["major_alert_sent"] = True
@ -1143,7 +1176,10 @@ Model Info:
""" """
alert_val = self.send_alert( alert_val = self.send_alert(
message=message, level="Low", alert_type="new_model_added" message=message,
level="Low",
alert_type="new_model_added",
alerting_metadata={},
) )
if alert_val is not None and asyncio.iscoroutine(alert_val): if alert_val is not None and asyncio.iscoroutine(alert_val):
@ -1368,6 +1404,7 @@ Model Info:
message: str, message: str,
level: Literal["Low", "Medium", "High"], level: Literal["Low", "Medium", "High"],
alert_type: Literal[AlertType], alert_type: Literal[AlertType],
alerting_metadata: dict,
user_info: Optional[WebhookEvent] = None, user_info: Optional[WebhookEvent] = None,
**kwargs, **kwargs,
): ):
@ -1425,6 +1462,9 @@ Model Info:
if kwargs: if kwargs:
for key, value in kwargs.items(): for key, value in kwargs.items():
formatted_message += f"\n\n{key}: `{value}`\n\n" formatted_message += f"\n\n{key}: `{value}`\n\n"
if alerting_metadata:
for key, value in alerting_metadata.items():
formatted_message += f"\n\n*Alerting Metadata*: \n{key}: `{value}`\n\n"
if _proxy_base_url is not None: if _proxy_base_url is not None:
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
@ -1622,6 +1662,7 @@ Model Info:
message=_weekly_spend_message, message=_weekly_spend_message,
level="Low", level="Low",
alert_type="spend_reports", alert_type="spend_reports",
alerting_metadata={},
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.error("Error sending weekly spend report", e) verbose_proxy_logger.error("Error sending weekly spend report", e)
@ -1673,6 +1714,7 @@ Model Info:
message=_spend_message, message=_spend_message,
level="Low", level="Low",
alert_type="spend_reports", alert_type="spend_reports",
alerting_metadata={},
) )
except Exception as e: except Exception as e:
verbose_proxy_logger.error("Error sending weekly spend report", e) verbose_proxy_logger.error("Error sending weekly spend report", e)

View file

@ -79,8 +79,8 @@ litellm_settings:
failure_callback: ["langfuse"] failure_callback: ["langfuse"]
cache: true cache: true
# general_settings: general_settings:
# alerting: ["email"] alerting: ["slack"]
# key_management_system: "aws_kms" # key_management_system: "aws_kms"
# key_management_settings: # key_management_settings:
# hosted_keys: ["LITELLM_MASTER_KEY"] # hosted_keys: ["LITELLM_MASTER_KEY"]

View file

@ -455,6 +455,7 @@ class ProxyLogging:
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
extra_kwargs = {} extra_kwargs = {}
alerting_metadata = {}
if request_data is not None: if request_data is not None:
_url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert( _url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
request_data=request_data request_data=request_data
@ -462,7 +463,12 @@ class ProxyLogging:
if _url is not None: if _url is not None:
extra_kwargs["🪢 Langfuse Trace"] = _url extra_kwargs["🪢 Langfuse Trace"] = _url
formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url) formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url)
if (
"metadata" in request_data
and request_data["metadata"].get("alerting_metadata", None) is not None
and isinstance(request_data["metadata"]["alerting_metadata"], dict)
):
alerting_metadata = request_data["metadata"]["alerting_metadata"]
for client in self.alerting: for client in self.alerting:
if client == "slack": if client == "slack":
await self.slack_alerting_instance.send_alert( await self.slack_alerting_instance.send_alert(
@ -470,6 +476,7 @@ class ProxyLogging:
level=level, level=level,
alert_type=alert_type, alert_type=alert_type,
user_info=None, user_info=None,
alerting_metadata=alerting_metadata,
**extra_kwargs, **extra_kwargs,
) )
elif client == "sentry": elif client == "sentry":
@ -510,7 +517,7 @@ class ProxyLogging:
) )
if hasattr(self, "service_logging_obj"): if hasattr(self, "service_logging_obj"):
self.service_logging_obj.async_service_failure_hook( await self.service_logging_obj.async_service_failure_hook(
service=ServiceTypes.DB, service=ServiceTypes.DB,
duration=duration, duration=duration,
error=error_message, error=error_message,

View file

@ -159,6 +159,29 @@ async def test_response_taking_too_long_callback(slack_alerting):
mock_send_alert.assert_awaited_once() mock_send_alert.assert_awaited_once()
@pytest.mark.asyncio
async def test_alerting_metadata(slack_alerting):
"""
Test alerting_metadata is propogated correctly for response taking too long
"""
start_time = datetime.now()
end_time = start_time + timedelta(seconds=301)
kwargs = {
"model": "test_model",
"messages": "test_messages",
"litellm_params": {"metadata": {"alerting_metadata": {"hello": "world"}}},
}
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
## RESPONSE TAKING TOO LONG
await slack_alerting.response_taking_too_long_callback(
kwargs, None, start_time, end_time
)
mock_send_alert.assert_awaited_once()
assert "hello" in mock_send_alert.call_args[1]["alerting_metadata"]
# Test for budget crossed # Test for budget crossed
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_budget_alerts_crossed(slack_alerting): async def test_budget_alerts_crossed(slack_alerting):