(Feat) - Add PagerDuty Alerting Integration (#7478)

* define basic types

* fix verbose_logger.exception statement

* fix basic alerting

* test pager duty alerting

* test_pagerduty_alerting_high_failure_rate

* PagerDutyAlerting

* async_log_failure_event

* use pre_call_hook

* add _request_is_completed helper util

* update AlertingConfig

* rename PagerDutyInternalEvent

* _send_alert_if_thresholds_crossed

* use pagerduty as _custom_logger_compatible_callbacks_literal

* fix slack alerting imports

* fix imports in slack alerting

* PagerDutyAlerting

* fix _load_alerting_settings

* test_pagerduty_hanging_request_alerting

* working pager duty alerting

* fix linting

* doc pager duty alerting

* update hanging_response_handler

* fix import location

* update failure_threshold

* update async_pre_call_hook

* docs pagerduty

* test - callback_class_str_to_classType

* fix linting errors

* fix linting + testing error

* PagerDutyAlerting

* test_pagerduty_hanging_request_alerting

* fix unused imports

* docs pager duty

* @pytest.mark.flaky(retries=6, delay=2)

* test_model_info_bedrock_converse_enforcement
This commit is contained in:
Ishaan Jaff 2025-01-01 07:12:51 -08:00 committed by GitHub
parent 9af6ba0a02
commit a39cac313c
15 changed files with 691 additions and 28 deletions

View file

@ -1939,15 +1939,7 @@ class ProxyConfig:
use_azure_key_vault = general_settings.get("use_azure_key_vault", False)
load_from_azure_key_vault(use_azure_key_vault=use_azure_key_vault)
### ALERTING ###
proxy_logging_obj.update_values(
alerting=general_settings.get("alerting", None),
alerting_threshold=general_settings.get("alerting_threshold", 600),
alert_types=general_settings.get("alert_types", None),
alert_to_webhook_url=general_settings.get("alert_to_webhook_url", None),
alerting_args=general_settings.get("alerting_args", None),
redis_cache=redis_usage_cache,
)
self._load_alerting_settings(general_settings=general_settings)
### CONNECT TO DATABASE ###
database_url = general_settings.get("database_url", None)
if database_url and database_url.startswith("os.environ/"):
@ -2135,6 +2127,46 @@ class ProxyConfig:
)
return router, router.get_model_list(), general_settings
def _load_alerting_settings(self, general_settings: dict):
"""
Initialize alerting settings
"""
from litellm.litellm_core_utils.litellm_logging import (
_init_custom_logger_compatible_class,
)
_alerting_callbacks = general_settings.get("alerting", None)
verbose_proxy_logger.debug(f"_alerting_callbacks: {general_settings}")
if _alerting_callbacks is None:
return
for _alert in _alerting_callbacks:
if _alert == "slack":
# [OLD] v0 implementation
proxy_logging_obj.update_values(
alerting=general_settings.get("alerting", None),
alerting_threshold=general_settings.get("alerting_threshold", 600),
alert_types=general_settings.get("alert_types", None),
alert_to_webhook_url=general_settings.get(
"alert_to_webhook_url", None
),
alerting_args=general_settings.get("alerting_args", None),
redis_cache=redis_usage_cache,
)
else:
# [NEW] v1 implementation - init as a custom logger
if _alert in litellm._known_custom_logger_compatible_callbacks:
_logger = _init_custom_logger_compatible_class(
logging_integration=_alert,
internal_usage_cache=None,
llm_router=None,
custom_logger_init_args={
"alerting_args": general_settings.get("alerting_args", None)
},
)
if _logger is not None:
litellm.callbacks.append(_logger)
pass
def get_model_info_with_id(self, model, db_model=False) -> RouterModelInfo:
"""
Common logic across add + delete router models