(feat proxy slack alerting) - allow opting in to getting key / internal user alerts (#5990)

* define all slack alert types

* use correct type hints for alert type

* use correct defaults on slack alerting

* add readme for slack alerting

* fix linting error

* update readme

* docs all alert types

* update slack alerting docs

* fix slack alerting docs

* handle new testing dir structure

* fix config for testing

* fix testing folder related imports

* fix /tests import errors

* fix import stream_chunk_testdata

* docs alert types

* fix test test_langfuse_trace_id

* fix type checks for slack alerting

* fix outage alerting test slack
This commit is contained in:
Ishaan Jaff 2024-10-01 10:49:22 -07:00 committed by GitHub
parent 99025024ae
commit 7dd44f8586
21 changed files with 283 additions and 210 deletions

View file

@ -49,6 +49,8 @@ from litellm.exceptions import RejectedRequestError
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
from litellm.integrations.SlackAlerting.types import DEFAULT_ALERT_TYPES
from litellm.integrations.SlackAlerting.utils import _add_langfuse_trace_id_to_alert
from litellm.litellm_core_utils.core_helpers import (
_get_parent_otel_span_from_kwargs,
get_litellm_metadata_from_kwargs,
@ -333,12 +335,11 @@ class ProxyLogging:
self.cache_control_check = _PROXY_CacheControlCheck()
self.alerting: Optional[List] = None
self.alerting_threshold: float = 300 # default to 5 min. threshold
self.alert_types: List[AlertType] = list(get_args(AlertType))
self.alert_types: List[AlertType] = DEFAULT_ALERT_TYPES
self.alert_to_webhook_url: Optional[dict] = None
self.slack_alerting_instance: SlackAlerting = SlackAlerting(
alerting_threshold=self.alerting_threshold,
alerting=self.alerting,
alert_types=self.alert_types,
internal_usage_cache=self.internal_usage_cache.dual_cache,
)
self.premium_user = premium_user
@ -644,9 +645,11 @@ class ProxyLogging:
async def failed_tracking_alert(self, error_message: str):
if self.alerting is None:
return
await self.slack_alerting_instance.failed_tracking_alert(
error_message=error_message
)
if self.slack_alerting_instance:
await self.slack_alerting_instance.failed_tracking_alert(
error_message=error_message
)
async def budget_alerts(
self,
@ -705,10 +708,7 @@ class ProxyLogging:
extra_kwargs = {}
alerting_metadata = {}
if request_data is not None:
_url = await self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
request_data=request_data
)
_url = await _add_langfuse_trace_id_to_alert(request_data=request_data)
if _url is not None:
extra_kwargs["🪢 Langfuse Trace"] = _url
@ -744,7 +744,7 @@ class ProxyLogging:
Currently only logs exceptions to sentry
"""
### ALERTING ###
if "db_exceptions" not in self.alert_types:
if AlertType.db_exceptions not in self.alert_types:
return
if isinstance(original_exception, HTTPException):
if isinstance(original_exception.detail, str):
@ -761,7 +761,7 @@ class ProxyLogging:
self.alerting_handler(
message=f"DB read/write call failed: {error_message}",
level="High",
alert_type="db_exceptions",
alert_type=AlertType.db_exceptions,
request_data={},
)
)
@ -796,7 +796,7 @@ class ProxyLogging:
await self.update_request_status(
litellm_call_id=request_data.get("litellm_call_id", ""), status="fail"
)
if "llm_exceptions" in self.alert_types and not isinstance(
if AlertType.llm_exceptions in self.alert_types and not isinstance(
original_exception, HTTPException
):
"""
@ -813,7 +813,7 @@ class ProxyLogging:
self.alerting_handler(
message=f"LLM API call failed: `{exception_str}`",
level="High",
alert_type="llm_exceptions",
alert_type=AlertType.llm_exceptions,
request_data=request_data,
)
)