[Feature]: - allow print alert log to console (#7534)

* update send_to_webhook * test_print_alerting_payload_warning * add alerting_args spec * test_alerting.py
2025-04-26 03:04:13 +00:00 · 2025-01-03 17:48:13 -08:00 · 2025-01-03 17:48:13 -08:00 · 1bb4941036
commit 1bb4941036
parent 6843f3a2bb
5 changed files with 108 additions and 3 deletions
--- a/docs/my-website/docs/proxy/alerting.md
+++ b/docs/my-website/docs/proxy/alerting.md
@ -48,6 +48,19 @@ general_settings:
    alerting: ["slack"]
    alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+ 
    spend_report_frequency: "1d" # [Optional] set as 1d, 2d, 30d .... Specifiy how often you want a Spend Report to be sent
    # [OPTIONAL ALERTING ARGS]
    alerting_args:
        daily_report_frequency: 43200  # 12 hours in seconds
        report_check_interval: 3600    # 1 hour in seconds
        budget_alert_ttl: 86400        # 24 hours in seconds
        outage_alert_ttl: 60           # 1 minute in seconds
        region_outage_alert_ttl: 60    # 1 minute in seconds
        minor_outage_alert_threshold: 5 
        major_outage_alert_threshold: 10
        max_outage_alert_list_size: 1000
        log_to_console: false
 ```
 Start proxy 
@ -457,3 +470,18 @@ Management Endpoint Alerts - Virtual Key, Team, Internal User
 | `new_internal_user_created` | Notifications for new internal user accounts | ❌ |
 | `internal_user_updated` | Alerts when an internal user's details are changed | ❌ |
 | `internal_user_deleted` | Notifications when an internal user account is removed | ❌ |
 ## `alerting_args` Specification
 | Parameter | Default | Description |
 |-----------|---------|-------------|
 | `daily_report_frequency` | 43200 (12 hours) | Frequency of receiving deployment latency/failure reports in seconds |
 | `report_check_interval` | 3600 (1 hour) | How often to check if a report should be sent (background process) in seconds |
 | `budget_alert_ttl` | 86400 (24 hours) | Cache TTL for budget alerts to prevent spam when budget is crossed |
 | `outage_alert_ttl` | 60 (1 minute) | Time window for collecting model outage errors in seconds |
 | `region_outage_alert_ttl` | 60 (1 minute) | Time window for collecting region-based outage errors in seconds |
 | `minor_outage_alert_threshold` | 5 | Number of errors that trigger a minor outage alert (400 errors not counted) |
 | `major_outage_alert_threshold` | 10 | Number of errors that trigger a major outage alert (400 errors not counted) |
 | `max_outage_alert_list_size` | 1000 | Maximum number of errors to store in cache per model/region |
 | `log_to_console` | false | If true, prints alerting payload to console as a `.warning` log. |
--- a/litellm/integrations/SlackAlerting/batching_handler.py
+++ b/litellm/integrations/SlackAlerting/batching_handler.py
@ -41,11 +41,27 @@ def squash_payloads(queue):
    return squashed
 def _print_alerting_payload_warning(
    payload: dict, slackAlertingInstance: SlackAlertingType
 ):
    """
    Print the payload to the console when
    slackAlertingInstance.alerting_args.log_to_console is True
    Relevant issue: https://github.com/BerriAI/litellm/issues/7372
    """
    if slackAlertingInstance.alerting_args.log_to_console is True:
        verbose_proxy_logger.warning(payload)
 async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count):
    """
    Send a single slack alert to the webhook
    """
    import json
    payload = item.get("payload", {})
    try:
        payload = item["payload"]
        if count > 1:
            payload["text"] = f"[Num Alerts: {count}]\n\n{payload['text']}"
@ -60,3 +76,7 @@ async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count)
            )
    except Exception as e:
        verbose_proxy_logger.debug(f"Error sending slack alert: {str(e)}")
    finally:
        _print_alerting_payload_warning(
            payload, slackAlertingInstance=slackAlertingInstance
        )
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -7,3 +7,5 @@ model_list:
 general_settings: 
  master_key: sk-1234 
  alerting: ["slack"]
  alerting_threshold: 0.0000001
--- a/litellm/types/integrations/slack_alerting.py
+++ b/litellm/types/integrations/slack_alerting.py
@ -78,6 +78,10 @@ class SlackAlertingArgs(LiteLLMPydanticObjectBase):
        default=SlackAlertingArgsEnum.max_outage_alert_list_size.value,
        description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.",
    )  # prevent memory leak
    log_to_console: bool = Field(
        default=False,
        description="If true, the alerting payload will be printed to the console.",
    )
 class DeploymentMetrics(LiteLLMPydanticObjectBase):
--- a/tests/logging_callback_tests/test_alerting.py
+++ b/tests/logging_callback_tests/test_alerting.py
@ -874,3 +874,54 @@ async def test_langfuse_trace_id():
    assert returned_trace_id == int(
        litellm_logging_obj._get_trace_id(service_name="langfuse")
    )
@pytest.mark.asyncio
 async def test_print_alerting_payload_warning():
    """
    Test if alerts are printed to verbose logger when log_to_console=True
    """
    litellm.set_verbose = True
    from litellm._logging import verbose_proxy_logger
    from litellm.integrations.SlackAlerting.batching_handler import send_to_webhook
    import logging
    # Create a string buffer to capture log output
    log_stream = io.StringIO()
    handler = logging.StreamHandler(log_stream)
    verbose_proxy_logger.addHandler(handler)
    verbose_proxy_logger.setLevel(logging.WARNING)
    # Create SlackAlerting instance with log_to_console=True
    slack_alerting = SlackAlerting(
        alerting_threshold=0.0000001,
        alerting=["slack"],
        alert_types=[AlertType.llm_exceptions],
        internal_usage_cache=DualCache(),
    )
    slack_alerting.alerting_args.log_to_console = True
    test_payload = {"text": "Test alert message"}
    # Send an alert
    with patch.object(
        slack_alerting.async_http_handler, "post", new=AsyncMock()
    ) as mock_post:
        await send_to_webhook(
            slackAlertingInstance=slack_alerting,
            item={
                "url": "https://example.com",
                "headers": {"Content-Type": "application/json"},
                "payload": {"text": "Test alert message"},
            },
            count=1,
        )
    # Check if the payload was logged
    log_output = log_stream.getvalue()
    print(log_output)
    assert "Test alert message" in log_output
    # Clean up
    verbose_proxy_logger.removeHandler(handler)
    log_stream.close()