mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
[Feature]: - allow print alert log to console (#7534)
* update send_to_webhook * test_print_alerting_payload_warning * add alerting_args spec * test_alerting.py
This commit is contained in:
parent
6843f3a2bb
commit
1bb4941036
5 changed files with 108 additions and 3 deletions
|
@ -48,6 +48,19 @@ general_settings:
|
||||||
alerting: ["slack"]
|
alerting: ["slack"]
|
||||||
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
|
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
|
||||||
spend_report_frequency: "1d" # [Optional] set as 1d, 2d, 30d .... Specifiy how often you want a Spend Report to be sent
|
spend_report_frequency: "1d" # [Optional] set as 1d, 2d, 30d .... Specifiy how often you want a Spend Report to be sent
|
||||||
|
|
||||||
|
# [OPTIONAL ALERTING ARGS]
|
||||||
|
alerting_args:
|
||||||
|
daily_report_frequency: 43200 # 12 hours in seconds
|
||||||
|
report_check_interval: 3600 # 1 hour in seconds
|
||||||
|
budget_alert_ttl: 86400 # 24 hours in seconds
|
||||||
|
outage_alert_ttl: 60 # 1 minute in seconds
|
||||||
|
region_outage_alert_ttl: 60 # 1 minute in seconds
|
||||||
|
minor_outage_alert_threshold: 5
|
||||||
|
major_outage_alert_threshold: 10
|
||||||
|
max_outage_alert_list_size: 1000
|
||||||
|
log_to_console: false
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Start proxy
|
Start proxy
|
||||||
|
@ -457,3 +470,18 @@ Management Endpoint Alerts - Virtual Key, Team, Internal User
|
||||||
| `new_internal_user_created` | Notifications for new internal user accounts | ❌ |
|
| `new_internal_user_created` | Notifications for new internal user accounts | ❌ |
|
||||||
| `internal_user_updated` | Alerts when an internal user's details are changed | ❌ |
|
| `internal_user_updated` | Alerts when an internal user's details are changed | ❌ |
|
||||||
| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ |
|
| `internal_user_deleted` | Notifications when an internal user account is removed | ❌ |
|
||||||
|
|
||||||
|
|
||||||
|
## `alerting_args` Specification
|
||||||
|
|
||||||
|
| Parameter | Default | Description |
|
||||||
|
|-----------|---------|-------------|
|
||||||
|
| `daily_report_frequency` | 43200 (12 hours) | Frequency of receiving deployment latency/failure reports in seconds |
|
||||||
|
| `report_check_interval` | 3600 (1 hour) | How often to check if a report should be sent (background process) in seconds |
|
||||||
|
| `budget_alert_ttl` | 86400 (24 hours) | Cache TTL for budget alerts to prevent spam when budget is crossed |
|
||||||
|
| `outage_alert_ttl` | 60 (1 minute) | Time window for collecting model outage errors in seconds |
|
||||||
|
| `region_outage_alert_ttl` | 60 (1 minute) | Time window for collecting region-based outage errors in seconds |
|
||||||
|
| `minor_outage_alert_threshold` | 5 | Number of errors that trigger a minor outage alert (400 errors not counted) |
|
||||||
|
| `major_outage_alert_threshold` | 10 | Number of errors that trigger a major outage alert (400 errors not counted) |
|
||||||
|
| `max_outage_alert_list_size` | 1000 | Maximum number of errors to store in cache per model/region |
|
||||||
|
| `log_to_console` | false | If true, prints alerting payload to console as a `.warning` log. |
|
|
@ -41,11 +41,27 @@ def squash_payloads(queue):
|
||||||
return squashed
|
return squashed
|
||||||
|
|
||||||
|
|
||||||
|
def _print_alerting_payload_warning(
|
||||||
|
payload: dict, slackAlertingInstance: SlackAlertingType
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Print the payload to the console when
|
||||||
|
slackAlertingInstance.alerting_args.log_to_console is True
|
||||||
|
|
||||||
|
Relevant issue: https://github.com/BerriAI/litellm/issues/7372
|
||||||
|
"""
|
||||||
|
if slackAlertingInstance.alerting_args.log_to_console is True:
|
||||||
|
verbose_proxy_logger.warning(payload)
|
||||||
|
|
||||||
|
|
||||||
async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count):
|
async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count):
|
||||||
|
"""
|
||||||
|
Send a single slack alert to the webhook
|
||||||
|
"""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
payload = item.get("payload", {})
|
||||||
try:
|
try:
|
||||||
payload = item["payload"]
|
|
||||||
if count > 1:
|
if count > 1:
|
||||||
payload["text"] = f"[Num Alerts: {count}]\n\n{payload['text']}"
|
payload["text"] = f"[Num Alerts: {count}]\n\n{payload['text']}"
|
||||||
|
|
||||||
|
@ -60,3 +76,7 @@ async def send_to_webhook(slackAlertingInstance: SlackAlertingType, item, count)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.debug(f"Error sending slack alert: {str(e)}")
|
verbose_proxy_logger.debug(f"Error sending slack alert: {str(e)}")
|
||||||
|
finally:
|
||||||
|
_print_alerting_payload_warning(
|
||||||
|
payload, slackAlertingInstance=slackAlertingInstance
|
||||||
|
)
|
||||||
|
|
|
@ -7,3 +7,5 @@ model_list:
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
alerting: ["slack"]
|
||||||
|
alerting_threshold: 0.0000001
|
|
@ -78,6 +78,10 @@ class SlackAlertingArgs(LiteLLMPydanticObjectBase):
|
||||||
default=SlackAlertingArgsEnum.max_outage_alert_list_size.value,
|
default=SlackAlertingArgsEnum.max_outage_alert_list_size.value,
|
||||||
description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.",
|
description="Maximum number of errors to store in cache. For a given model/region. Prevents memory leaks.",
|
||||||
) # prevent memory leak
|
) # prevent memory leak
|
||||||
|
log_to_console: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="If true, the alerting payload will be printed to the console.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DeploymentMetrics(LiteLLMPydanticObjectBase):
|
class DeploymentMetrics(LiteLLMPydanticObjectBase):
|
||||||
|
|
|
@ -874,3 +874,54 @@ async def test_langfuse_trace_id():
|
||||||
assert returned_trace_id == int(
|
assert returned_trace_id == int(
|
||||||
litellm_logging_obj._get_trace_id(service_name="langfuse")
|
litellm_logging_obj._get_trace_id(service_name="langfuse")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_print_alerting_payload_warning():
|
||||||
|
"""
|
||||||
|
Test if alerts are printed to verbose logger when log_to_console=True
|
||||||
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.integrations.SlackAlerting.batching_handler import send_to_webhook
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Create a string buffer to capture log output
|
||||||
|
log_stream = io.StringIO()
|
||||||
|
handler = logging.StreamHandler(log_stream)
|
||||||
|
verbose_proxy_logger.addHandler(handler)
|
||||||
|
verbose_proxy_logger.setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
# Create SlackAlerting instance with log_to_console=True
|
||||||
|
slack_alerting = SlackAlerting(
|
||||||
|
alerting_threshold=0.0000001,
|
||||||
|
alerting=["slack"],
|
||||||
|
alert_types=[AlertType.llm_exceptions],
|
||||||
|
internal_usage_cache=DualCache(),
|
||||||
|
)
|
||||||
|
slack_alerting.alerting_args.log_to_console = True
|
||||||
|
|
||||||
|
test_payload = {"text": "Test alert message"}
|
||||||
|
|
||||||
|
# Send an alert
|
||||||
|
with patch.object(
|
||||||
|
slack_alerting.async_http_handler, "post", new=AsyncMock()
|
||||||
|
) as mock_post:
|
||||||
|
await send_to_webhook(
|
||||||
|
slackAlertingInstance=slack_alerting,
|
||||||
|
item={
|
||||||
|
"url": "https://example.com",
|
||||||
|
"headers": {"Content-Type": "application/json"},
|
||||||
|
"payload": {"text": "Test alert message"},
|
||||||
|
},
|
||||||
|
count=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if the payload was logged
|
||||||
|
log_output = log_stream.getvalue()
|
||||||
|
print(log_output)
|
||||||
|
assert "Test alert message" in log_output
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
verbose_proxy_logger.removeHandler(handler)
|
||||||
|
log_stream.close()
|
Loading…
Add table
Add a link
Reference in a new issue