(feat proxy slack alerting) - allow opting in to getting key / internal user alerts (#5990)

* define all slack alert types * use correct type hints for alert type * use correct defaults on slack alerting * add readme for slack alerting * fix linting error * update readme * docs all alert types * update slack alerting docs * fix slack alerting docs * handle new testing dir structure * fix config for testing * fix testing folder related imports * fix /tests import errors * fix import stream_chunk_testdata * docs alert types * fix test test_langfuse_trace_id * fix type checks for slack alerting * fix outage alerting test slack
2025-04-26 11:14:04 +00:00 · 2024-10-01 10:49:22 -07:00 · 2024-10-01 10:49:22 -07:00 · 7dd44f8586
commit 7dd44f8586
parent 99025024ae
21 changed files with 283 additions and 210 deletions
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -49,6 +49,8 @@ from litellm.exceptions import RejectedRequestError
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
+from litellm.integrations.SlackAlerting.types import DEFAULT_ALERT_TYPES
+from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
 from litellm.litellm_core_utils.core_helpers import (
    _get_parent_otel_span_from_kwargs,
    get_litellm_metadata_from_kwargs,
@ -333,12 +335,11 @@ class ProxyLogging:
        self.cache_control_check = _PROXY_CacheControlCheck()
        self.alerting: Optional[List] = None
        self.alerting_threshold: float = 300  # default to 5 min. threshold
-        self.alert_types: List[AlertType] = list(get_args(AlertType))
+        self.alert_types: List[AlertType] = DEFAULT_ALERT_TYPES
        self.alert_to_webhook_url: Optional[dict] = None
        self.slack_alerting_instance: SlackAlerting = SlackAlerting(
            alerting_threshold=self.alerting_threshold,
            alerting=self.alerting,
-            alert_types=self.alert_types,
            internal_usage_cache=self.internal_usage_cache.dual_cache,
        )
        self.premium_user = premium_user
@ -644,9 +645,11 @@ class ProxyLogging:
    async def failed_tracking_alert(self, error_message: str):
        if self.alerting is None:
            return
-        await self.slack_alerting_instance.failed_tracking_alert(
-            error_message=error_message
-        )
+
+        if self.slack_alerting_instance:
+            await self.slack_alerting_instance.failed_tracking_alert(
+                error_message=error_message
+            )

    async def budget_alerts(
        self,
@ -705,10 +708,7 @@ class ProxyLogging:
        extra_kwargs = {}
        alerting_metadata = {}
        if request_data is not None:
-
-            _url = await self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
-                request_data=request_data
-            )
+            _url = await _add_langfuse_trace_id_to_alert(request_data=request_data)

            if _url is not None:
                extra_kwargs["🪢 Langfuse Trace"] = _url
@ -744,7 +744,7 @@ class ProxyLogging:
        Currently only logs exceptions to sentry
        """
        ### ALERTING ###
-        if "db_exceptions" not in self.alert_types:
+        if AlertType.db_exceptions not in self.alert_types:
            return
        if isinstance(original_exception, HTTPException):
            if isinstance(original_exception.detail, str):
@ -761,7 +761,7 @@ class ProxyLogging:
            self.alerting_handler(
                message=f"DB read/write call failed: {error_message}",
                level="High",
-                alert_type="db_exceptions",
+                alert_type=AlertType.db_exceptions,
                request_data={},
            )
        )
@ -796,7 +796,7 @@ class ProxyLogging:
        await self.update_request_status(
            litellm_call_id=request_data.get("litellm_call_id", ""), status="fail"
        )
-        if "llm_exceptions" in self.alert_types and not isinstance(
+        if AlertType.llm_exceptions in self.alert_types and not isinstance(
            original_exception, HTTPException
        ):
            """
@ -813,7 +813,7 @@ class ProxyLogging:
                self.alerting_handler(
                    message=f"LLM API call failed: `{exception_str}`",
                    level="High",
-                    alert_type="llm_exceptions",
+                    alert_type=AlertType.llm_exceptions,
                    request_data=request_data,
                )
            )