[Feature]: - allow print alert log to console (#7534)

* update send_to_webhook

* test_print_alerting_payload_warning

* add alerting_args spec

* test_alerting.py
This commit is contained in:
Ishaan Jaff 2025-01-03 17:48:13 -08:00 committed by GitHub
parent 6843f3a2bb
commit 1bb4941036
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 108 additions and 3 deletions

View file

@ -48,6 +48,19 @@ general_settings:
alerting: ["slack"]
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
spend_report_frequency: "1d" # [Optional] set as 1d, 2d, 30d .... Specifiy how often you want a Spend Report to be sent
# [OPTIONAL ALERTING ARGS]
alerting_args:
daily_report_frequency: 43200 # 12 hours in seconds
report_check_interval: 3600 # 1 hour in seconds
budget_alert_ttl: 86400 # 24 hours in seconds
outage_alert_ttl: 60 # 1 minute in seconds
region_outage_alert_ttl: 60 # 1 minute in seconds
minor_outage_alert_threshold: 5
major_outage_alert_threshold: 10
max_outage_alert_list_size: 1000
log_to_console: false
```
Start proxy
@ -456,4 +469,19 @@ Management Endpoint Alerts - Virtual Key, Team, Internal User
| `team_deleted` | Alerts when a team is deleted | ❌ |
| `new_internal_user_created` | Notifications for new internal user accounts | ❌ |
| `internal_user_updated` | Alerts when an internal user's details are changed | ❌ |
| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ |
| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ |
## `alerting_args` Specification
| Parameter | Default | Description |
|-----------|---------|-------------|
| `daily_report_frequency` | 43200 (12 hours) | Frequency of receiving deployment latency/failure reports in seconds |
| `report_check_interval` | 3600 (1 hour) | How often to check if a report should be sent (background process) in seconds |
| `budget_alert_ttl` | 86400 (24 hours) | Cache TTL for budget alerts to prevent spam when budget is crossed |
| `outage_alert_ttl` | 60 (1 minute) | Time window for collecting model outage errors in seconds |
| `region_outage_alert_ttl` | 60 (1 minute) | Time window for collecting region-based outage errors in seconds |
| `minor_outage_alert_threshold` | 5 | Number of errors that trigger a minor outage alert (400 errors not counted) |
| `major_outage_alert_threshold` | 10 | Number of errors that trigger a major outage alert (400 errors not counted) |
| `max_outage_alert_list_size` | 1000 | Maximum number of errors to store in cache per model/region |
| `log_to_console` | false | If true, prints alerting payload to console as a `.warning` log. |

View file

@ -41,11 +41,27 @@ def squash_payloads(queue):
return squashed
def _print_alerting_payload_warning(
payload: dict, slackAlertingInstance: SlackAlertingType
):
"""
Print the payload to the console when
slackAlertingInstance.alerting_args.log_to_console is True
Relevant issue: https://github.com/BerriAI/litellm/issues/7372
"""
if slackAlertingInstance.alerting_args.log_to_console is True:
verbose_proxy_logger.warning(payload)
async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count):
"""
Send a single slack alert to the webhook
"""
import json
payload = item.get("payload", {})
try:
payload = item["payload"]
if count > 1:
payload["text"] = f"[Num Alerts: {count}]\n\n{payload['text']}"
@ -60,3 +76,7 @@ async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count)
)
except Exception as e:
verbose_proxy_logger.debug(f"Error sending slack alert: {str(e)}")
finally:
_print_alerting_payload_warning(
payload, slackAlertingInstance=slackAlertingInstance
)

View file

@ -6,4 +6,6 @@ model_list:
api_key: "ishaan"
general_settings:
master_key: sk-1234
master_key: sk-1234
alerting: ["slack"]
alerting_threshold: 0.0000001

View file

@ -78,6 +78,10 @@ class SlackAlertingArgs(LiteLLMPydanticObjectBase):
default=SlackAlertingArgsEnum.max_outage_alert_list_size.value,
description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.",
) # prevent memory leak
log_to_console: bool = Field(
default=False,
description="If true, the alerting payload will be printed to the console.",
)
class DeploymentMetrics(LiteLLMPydanticObjectBase):

View file

@ -874,3 +874,54 @@ async def test_langfuse_trace_id():
assert returned_trace_id == int(
litellm_logging_obj._get_trace_id(service_name="langfuse")
)
@pytest.mark.asyncio
async def test_print_alerting_payload_warning():
"""
Test if alerts are printed to verbose logger when log_to_console=True
"""
litellm.set_verbose = True
from litellm._logging import verbose_proxy_logger
from litellm.integrations.SlackAlerting.batching_handler import send_to_webhook
import logging
# Create a string buffer to capture log output
log_stream = io.StringIO()
handler = logging.StreamHandler(log_stream)
verbose_proxy_logger.addHandler(handler)
verbose_proxy_logger.setLevel(logging.WARNING)
# Create SlackAlerting instance with log_to_console=True
slack_alerting = SlackAlerting(
alerting_threshold=0.0000001,
alerting=["slack"],
alert_types=[AlertType.llm_exceptions],
internal_usage_cache=DualCache(),
)
slack_alerting.alerting_args.log_to_console = True
test_payload = {"text": "Test alert message"}
# Send an alert
with patch.object(
slack_alerting.async_http_handler, "post", new=AsyncMock()
) as mock_post:
await send_to_webhook(
slackAlertingInstance=slack_alerting,
item={
"url": "https://example.com",
"headers": {"Content-Type": "application/json"},
"payload": {"text": "Test alert message"},
},
count=1,
)
# Check if the payload was logged
log_output = log_stream.getvalue()
print(log_output)
assert "Test alert message" in log_output
# Clean up
verbose_proxy_logger.removeHandler(handler)
log_stream.close()