diff --git a/docs/my-website/docs/proxy/alerting.md b/docs/my-website/docs/proxy/alerting.md index a5519157c4..b6296b60a7 100644 --- a/docs/my-website/docs/proxy/alerting.md +++ b/docs/my-website/docs/proxy/alerting.md @@ -48,6 +48,19 @@ general_settings: alerting: ["slack"] alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+ spend_report_frequency: "1d" # [Optional] set as 1d, 2d, 30d .... Specifiy how often you want a Spend Report to be sent + + # [OPTIONAL ALERTING ARGS] + alerting_args: + daily_report_frequency: 43200 # 12 hours in seconds + report_check_interval: 3600 # 1 hour in seconds + budget_alert_ttl: 86400 # 24 hours in seconds + outage_alert_ttl: 60 # 1 minute in seconds + region_outage_alert_ttl: 60 # 1 minute in seconds + minor_outage_alert_threshold: 5 + major_outage_alert_threshold: 10 + max_outage_alert_list_size: 1000 + log_to_console: false + ``` Start proxy @@ -456,4 +469,19 @@ Management Endpoint Alerts - Virtual Key, Team, Internal User | `team_deleted` | Alerts when a team is deleted | ❌ | | `new_internal_user_created` | Notifications for new internal user accounts | ❌ | | `internal_user_updated` | Alerts when an internal user's details are changed | ❌ | -| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ | \ No newline at end of file +| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ | + + +## `alerting_args` Specification + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `daily_report_frequency` | 43200 (12 hours) | Frequency of receiving deployment latency/failure reports in seconds | +| `report_check_interval` | 3600 (1 hour) | How often to check if a report should be sent (background process) in seconds | +| `budget_alert_ttl` | 86400 (24 hours) | Cache TTL for budget alerts to prevent spam when budget is crossed | +| `outage_alert_ttl` | 60 (1 minute) | Time window for collecting model outage errors in seconds | +| `region_outage_alert_ttl` | 60 (1 minute) | Time window for collecting region-based outage errors in seconds | +| `minor_outage_alert_threshold` | 5 | Number of errors that trigger a minor outage alert (400 errors not counted) | +| `major_outage_alert_threshold` | 10 | Number of errors that trigger a major outage alert (400 errors not counted) | +| `max_outage_alert_list_size` | 1000 | Maximum number of errors to store in cache per model/region | +| `log_to_console` | false | If true, prints alerting payload to console as a `.warning` log. | \ No newline at end of file diff --git a/litellm/integrations/SlackAlerting/batching_handler.py b/litellm/integrations/SlackAlerting/batching_handler.py index f52147a001..e35cf61d63 100644 --- a/litellm/integrations/SlackAlerting/batching_handler.py +++ b/litellm/integrations/SlackAlerting/batching_handler.py @@ -41,11 +41,27 @@ def squash_payloads(queue): return squashed +def _print_alerting_payload_warning( + payload: dict, slackAlertingInstance: SlackAlertingType +): + """ + Print the payload to the console when + slackAlertingInstance.alerting_args.log_to_console is True + + Relevant issue: https://github.com/BerriAI/litellm/issues/7372 + """ + if slackAlertingInstance.alerting_args.log_to_console is True: + verbose_proxy_logger.warning(payload) + + async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count): + """ + Send a single slack alert to the webhook + """ import json + payload = item.get("payload", {}) try: - payload = item["payload"] if count > 1: payload["text"] = f"[Num Alerts: {count}]\n\n{payload['text']}" @@ -60,3 +76,7 @@ async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count) ) except Exception as e: verbose_proxy_logger.debug(f"Error sending slack alert: {str(e)}") + finally: + _print_alerting_payload_warning( + payload, slackAlertingInstance=slackAlertingInstance + ) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 787347e84d..f7eea05441 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -6,4 +6,6 @@ model_list: api_key: "ishaan" general_settings: - master_key: sk-1234 \ No newline at end of file + master_key: sk-1234 + alerting: ["slack"] + alerting_threshold: 0.0000001 \ No newline at end of file diff --git a/litellm/types/integrations/slack_alerting.py b/litellm/types/integrations/slack_alerting.py index f7026c407d..9019b098d9 100644 --- a/litellm/types/integrations/slack_alerting.py +++ b/litellm/types/integrations/slack_alerting.py @@ -78,6 +78,10 @@ class SlackAlertingArgs(LiteLLMPydanticObjectBase): default=SlackAlertingArgsEnum.max_outage_alert_list_size.value, description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.", ) # prevent memory leak + log_to_console: bool = Field( + default=False, + description="If true, the alerting payload will be printed to the console.", + ) class DeploymentMetrics(LiteLLMPydanticObjectBase): diff --git a/tests/local_testing/test_alerting.py b/tests/logging_callback_tests/test_alerting.py similarity index 94% rename from tests/local_testing/test_alerting.py rename to tests/logging_callback_tests/test_alerting.py index cc668801f0..c095a1619e 100644 --- a/tests/local_testing/test_alerting.py +++ b/tests/logging_callback_tests/test_alerting.py @@ -874,3 +874,54 @@ async def test_langfuse_trace_id(): assert returned_trace_id == int( litellm_logging_obj._get_trace_id(service_name="langfuse") ) + + +@pytest.mark.asyncio +async def test_print_alerting_payload_warning(): + """ + Test if alerts are printed to verbose logger when log_to_console=True + """ + litellm.set_verbose = True + from litellm._logging import verbose_proxy_logger + from litellm.integrations.SlackAlerting.batching_handler import send_to_webhook + import logging + + # Create a string buffer to capture log output + log_stream = io.StringIO() + handler = logging.StreamHandler(log_stream) + verbose_proxy_logger.addHandler(handler) + verbose_proxy_logger.setLevel(logging.WARNING) + + # Create SlackAlerting instance with log_to_console=True + slack_alerting = SlackAlerting( + alerting_threshold=0.0000001, + alerting=["slack"], + alert_types=[AlertType.llm_exceptions], + internal_usage_cache=DualCache(), + ) + slack_alerting.alerting_args.log_to_console = True + + test_payload = {"text": "Test alert message"} + + # Send an alert + with patch.object( + slack_alerting.async_http_handler, "post", new=AsyncMock() + ) as mock_post: + await send_to_webhook( + slackAlertingInstance=slack_alerting, + item={ + "url": "https://example.com", + "headers": {"Content-Type": "application/json"}, + "payload": {"text": "Test alert message"}, + }, + count=1, + ) + + # Check if the payload was logged + log_output = log_stream.getvalue() + print(log_output) + assert "Test alert message" in log_output + + # Clean up + verbose_proxy_logger.removeHandler(handler) + log_stream.close()