diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 21415fb6d6..2c558af07b 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -330,6 +330,7 @@ class SlackAlerting(CustomLogger): messages = "Message not logged. litellm.redact_messages_in_exceptions=True" request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`" slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`" + alerting_metadata: dict = {} if time_difference_float > self.alerting_threshold: # add deployment latencies to alert if ( @@ -337,7 +338,7 @@ class SlackAlerting(CustomLogger): and "litellm_params" in kwargs and "metadata" in kwargs["litellm_params"] ): - _metadata = kwargs["litellm_params"]["metadata"] + _metadata: dict = kwargs["litellm_params"]["metadata"] request_info = litellm.utils._add_key_name_and_team_to_alert( request_info=request_info, metadata=_metadata ) @@ -349,10 +350,14 @@ class SlackAlerting(CustomLogger): request_info += ( f"\nAvailable Deployment Latencies\n{_deployment_latency_map}" ) + + if "alerting_metadata" in _metadata: + alerting_metadata = _metadata["alerting_metadata"] await self.send_alert( message=slow_message + request_info, level="Low", alert_type="llm_too_slow", + alerting_metadata=alerting_metadata, ) async def async_update_daily_reports( @@ -540,7 +545,12 @@ class SlackAlerting(CustomLogger): message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s" # send alert - await self.send_alert(message=message, level="Low", alert_type="daily_reports") + await self.send_alert( + message=message, + level="Low", + alert_type="daily_reports", + alerting_metadata={}, + ) return True @@ -582,6 +592,7 @@ class SlackAlerting(CustomLogger): await asyncio.sleep( self.alerting_threshold ) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests + alerting_metadata: dict = {} if ( request_data is not None and request_data.get("litellm_status", "") != "success" @@ -606,7 +617,7 @@ class SlackAlerting(CustomLogger): ): # In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data`` # in that case we fallback to the api base set in the request metadata - _metadata = request_data["metadata"] + _metadata: dict = request_data["metadata"] _api_base = _metadata.get("api_base", "") request_info = litellm.utils._add_key_name_and_team_to_alert( @@ -615,6 +626,9 @@ class SlackAlerting(CustomLogger): if _api_base is None: _api_base = "" + + if "alerting_metadata" in _metadata: + alerting_metadata = _metadata["alerting_metadata"] request_info += f"\nAPI Base: `{_api_base}`" # only alert hanging responses if they have not been marked as success alerting_message = ( @@ -640,6 +654,7 @@ class SlackAlerting(CustomLogger): message=alerting_message + request_info, level="Medium", alert_type="llm_requests_hanging", + alerting_metadata=alerting_metadata, ) async def failed_tracking_alert(self, error_message: str): @@ -650,7 +665,10 @@ class SlackAlerting(CustomLogger): result = await _cache.async_get_cache(key=_cache_key) if result is None: await self.send_alert( - message=message, level="High", alert_type="budget_alerts" + message=message, + level="High", + alert_type="budget_alerts", + alerting_metadata={}, ) await _cache.async_set_cache( key=_cache_key, @@ -751,6 +769,7 @@ class SlackAlerting(CustomLogger): level="High", alert_type="budget_alerts", user_info=webhook_event, + alerting_metadata={}, ) await _cache.async_set_cache( key=_cache_key, @@ -941,7 +960,10 @@ class SlackAlerting(CustomLogger): ) # send minor alert await self.send_alert( - message=msg, level="Medium", alert_type="outage_alerts" + message=msg, + level="Medium", + alert_type="outage_alerts", + alerting_metadata={}, ) # set to true outage_value["minor_alert_sent"] = True @@ -963,7 +985,12 @@ class SlackAlerting(CustomLogger): ) # send minor alert - await self.send_alert(message=msg, level="High", alert_type="outage_alerts") + await self.send_alert( + message=msg, + level="High", + alert_type="outage_alerts", + alerting_metadata={}, + ) # set to true outage_value["major_alert_sent"] = True @@ -1062,7 +1089,10 @@ class SlackAlerting(CustomLogger): ) # send minor alert await self.send_alert( - message=msg, level="Medium", alert_type="outage_alerts" + message=msg, + level="Medium", + alert_type="outage_alerts", + alerting_metadata={}, ) # set to true outage_value["minor_alert_sent"] = True @@ -1081,7 +1111,10 @@ class SlackAlerting(CustomLogger): ) # send minor alert await self.send_alert( - message=msg, level="High", alert_type="outage_alerts" + message=msg, + level="High", + alert_type="outage_alerts", + alerting_metadata={}, ) # set to true outage_value["major_alert_sent"] = True @@ -1143,7 +1176,10 @@ Model Info: """ alert_val = self.send_alert( - message=message, level="Low", alert_type="new_model_added" + message=message, + level="Low", + alert_type="new_model_added", + alerting_metadata={}, ) if alert_val is not None and asyncio.iscoroutine(alert_val): @@ -1368,6 +1404,7 @@ Model Info: message: str, level: Literal["Low", "Medium", "High"], alert_type: Literal[AlertType], + alerting_metadata: dict, user_info: Optional[WebhookEvent] = None, **kwargs, ): @@ -1425,6 +1462,9 @@ Model Info: if kwargs: for key, value in kwargs.items(): formatted_message += f"\n\n{key}: `{value}`\n\n" + if alerting_metadata: + for key, value in alerting_metadata.items(): + formatted_message += f"\n\n*Alerting Metadata*: \n{key}: `{value}`\n\n" if _proxy_base_url is not None: formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" @@ -1622,6 +1662,7 @@ Model Info: message=_weekly_spend_message, level="Low", alert_type="spend_reports", + alerting_metadata={}, ) except Exception as e: verbose_proxy_logger.error("Error sending weekly spend report", e) @@ -1673,6 +1714,7 @@ Model Info: message=_spend_message, level="Low", alert_type="spend_reports", + alerting_metadata={}, ) except Exception as e: verbose_proxy_logger.error("Error sending weekly spend report", e) diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 5504d7a61c..15b8bc93e6 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -79,8 +79,8 @@ litellm_settings: failure_callback: ["langfuse"] cache: true -# general_settings: -# alerting: ["email"] +general_settings: + alerting: ["slack"] # key_management_system: "aws_kms" # key_management_settings: # hosted_keys: ["LITELLM_MASTER_KEY"] diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index ebe30789d7..7741e5382a 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -455,6 +455,7 @@ class ProxyLogging: formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`" extra_kwargs = {} + alerting_metadata = {} if request_data is not None: _url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert( request_data=request_data @@ -462,7 +463,12 @@ class ProxyLogging: if _url is not None: extra_kwargs["🪢 Langfuse Trace"] = _url formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url) - + if ( + "metadata" in request_data + and request_data["metadata"].get("alerting_metadata", None) is not None + and isinstance(request_data["metadata"]["alerting_metadata"], dict) + ): + alerting_metadata = request_data["metadata"]["alerting_metadata"] for client in self.alerting: if client == "slack": await self.slack_alerting_instance.send_alert( @@ -470,6 +476,7 @@ class ProxyLogging: level=level, alert_type=alert_type, user_info=None, + alerting_metadata=alerting_metadata, **extra_kwargs, ) elif client == "sentry": @@ -510,7 +517,7 @@ class ProxyLogging: ) if hasattr(self, "service_logging_obj"): - self.service_logging_obj.async_service_failure_hook( + await self.service_logging_obj.async_service_failure_hook( service=ServiceTypes.DB, duration=duration, error=error_message, diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 9dfec3dcfa..47d9ddefc8 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -159,6 +159,29 @@ async def test_response_taking_too_long_callback(slack_alerting): mock_send_alert.assert_awaited_once() +@pytest.mark.asyncio +async def test_alerting_metadata(slack_alerting): + """ + Test alerting_metadata is propogated correctly for response taking too long + """ + start_time = datetime.now() + end_time = start_time + timedelta(seconds=301) + kwargs = { + "model": "test_model", + "messages": "test_messages", + "litellm_params": {"metadata": {"alerting_metadata": {"hello": "world"}}}, + } + with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert: + + ## RESPONSE TAKING TOO LONG + await slack_alerting.response_taking_too_long_callback( + kwargs, None, start_time, end_time + ) + mock_send_alert.assert_awaited_once() + + assert "hello" in mock_send_alert.call_args[1]["alerting_metadata"] + + # Test for budget crossed @pytest.mark.asyncio async def test_budget_alerts_crossed(slack_alerting):