forked from phoenix/litellm-mirror
Merge pull request #4205 from BerriAI/litellm_alerting_metadata
fix(slack_alerting.py): allow new 'alerting_metadata' arg
This commit is contained in:
commit
3ef5d40b73
6 changed files with 130 additions and 37 deletions
|
@ -32,41 +32,33 @@ Get a slack webhook url from https://api.slack.com/messaging/webhooks
|
|||
|
||||
You can also use Discord Webhooks, see [here](#using-discord-webhooks)
|
||||
|
||||
### Step 2: Update config.yaml
|
||||
|
||||
- Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
|
||||
- Just for testing purposes, let's save a bad key to our proxy.
|
||||
Set `SLACK_WEBHOOK_URL` in your proxy env to enable Slack alerts.
|
||||
|
||||
```bash
|
||||
export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/<>/<>/<>"
|
||||
```
|
||||
|
||||
### Step 2: Setup Proxy
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
model_name: "azure-model"
|
||||
litellm_params:
|
||||
model: "azure/gpt-35-turbo"
|
||||
api_key: "my-bad-key" # 👈 bad key
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
alerting_threshold: 300 # sends alerts if requests hang for 5min+ and responses take 5min+
|
||||
|
||||
environment_variables:
|
||||
SLACK_WEBHOOK_URL: "https://hooks.slack.com/services/<>/<>/<>"
|
||||
SLACK_DAILY_REPORT_FREQUENCY: "86400" # 24 hours; Optional: defaults to 12 hours
|
||||
```
|
||||
|
||||
|
||||
### Step 3: Start proxy
|
||||
|
||||
Start proxy
|
||||
```bash
|
||||
$ litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
## Testing Alerting is Setup Correctly
|
||||
|
||||
Make a GET request to `/health/services`, expect to see a test slack alert in your provided webhook slack channel
|
||||
### Step 3: Test it!
|
||||
|
||||
```shell
|
||||
curl -X GET 'http://localhost:4000/health/services?service=slack' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
|
||||
```bash
|
||||
curl -X GET 'http://0.0.0.0:4000/health/services?service=slack' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
## Advanced - Redacting Messages from Alerts
|
||||
|
@ -84,7 +76,34 @@ litellm_settings:
|
|||
```
|
||||
|
||||
|
||||
## Advanced - Add Metadata to alerts
|
||||
|
||||
Add alerting metadata to proxy calls for debugging.
|
||||
|
||||
```python
|
||||
import openai
|
||||
client = openai.OpenAI(
|
||||
api_key="anything",
|
||||
base_url="http://0.0.0.0:4000"
|
||||
)
|
||||
|
||||
# request sent to model set on litellm proxy, `litellm --model`
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages = [],
|
||||
extra_body={
|
||||
"metadata": {
|
||||
"alerting_metadata": {
|
||||
"hello": "world"
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
**Expected Response**
|
||||
|
||||
<Image img={require('../../img/alerting_metadata.png')}/>
|
||||
|
||||
## Advanced - Opting into specific alert types
|
||||
|
||||
|
|
BIN
docs/my-website/img/alerting_metadata.png
Normal file
BIN
docs/my-website/img/alerting_metadata.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 207 KiB |
|
@ -330,6 +330,7 @@ class SlackAlerting(CustomLogger):
|
|||
messages = "Message not logged. litellm.redact_messages_in_exceptions=True"
|
||||
request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
|
||||
slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
|
||||
alerting_metadata: dict = {}
|
||||
if time_difference_float > self.alerting_threshold:
|
||||
# add deployment latencies to alert
|
||||
if (
|
||||
|
@ -337,7 +338,7 @@ class SlackAlerting(CustomLogger):
|
|||
and "litellm_params" in kwargs
|
||||
and "metadata" in kwargs["litellm_params"]
|
||||
):
|
||||
_metadata = kwargs["litellm_params"]["metadata"]
|
||||
_metadata: dict = kwargs["litellm_params"]["metadata"]
|
||||
request_info = litellm.utils._add_key_name_and_team_to_alert(
|
||||
request_info=request_info, metadata=_metadata
|
||||
)
|
||||
|
@ -349,10 +350,14 @@ class SlackAlerting(CustomLogger):
|
|||
request_info += (
|
||||
f"\nAvailable Deployment Latencies\n{_deployment_latency_map}"
|
||||
)
|
||||
|
||||
if "alerting_metadata" in _metadata:
|
||||
alerting_metadata = _metadata["alerting_metadata"]
|
||||
await self.send_alert(
|
||||
message=slow_message + request_info,
|
||||
level="Low",
|
||||
alert_type="llm_too_slow",
|
||||
alerting_metadata=alerting_metadata,
|
||||
)
|
||||
|
||||
async def async_update_daily_reports(
|
||||
|
@ -540,7 +545,12 @@ class SlackAlerting(CustomLogger):
|
|||
message += f"\n\nNext Run is at: `{time.time() + self.alerting_args.daily_report_frequency}`s"
|
||||
|
||||
# send alert
|
||||
await self.send_alert(message=message, level="Low", alert_type="daily_reports")
|
||||
await self.send_alert(
|
||||
message=message,
|
||||
level="Low",
|
||||
alert_type="daily_reports",
|
||||
alerting_metadata={},
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -582,6 +592,7 @@ class SlackAlerting(CustomLogger):
|
|||
await asyncio.sleep(
|
||||
self.alerting_threshold
|
||||
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
|
||||
alerting_metadata: dict = {}
|
||||
if (
|
||||
request_data is not None
|
||||
and request_data.get("litellm_status", "") != "success"
|
||||
|
@ -606,7 +617,7 @@ class SlackAlerting(CustomLogger):
|
|||
):
|
||||
# In hanging requests sometime it has not made it to the point where the deployment is passed to the `request_data``
|
||||
# in that case we fallback to the api base set in the request metadata
|
||||
_metadata = request_data["metadata"]
|
||||
_metadata: dict = request_data["metadata"]
|
||||
_api_base = _metadata.get("api_base", "")
|
||||
|
||||
request_info = litellm.utils._add_key_name_and_team_to_alert(
|
||||
|
@ -615,6 +626,9 @@ class SlackAlerting(CustomLogger):
|
|||
|
||||
if _api_base is None:
|
||||
_api_base = ""
|
||||
|
||||
if "alerting_metadata" in _metadata:
|
||||
alerting_metadata = _metadata["alerting_metadata"]
|
||||
request_info += f"\nAPI Base: `{_api_base}`"
|
||||
# only alert hanging responses if they have not been marked as success
|
||||
alerting_message = (
|
||||
|
@ -640,6 +654,7 @@ class SlackAlerting(CustomLogger):
|
|||
message=alerting_message + request_info,
|
||||
level="Medium",
|
||||
alert_type="llm_requests_hanging",
|
||||
alerting_metadata=alerting_metadata,
|
||||
)
|
||||
|
||||
async def failed_tracking_alert(self, error_message: str):
|
||||
|
@ -650,7 +665,10 @@ class SlackAlerting(CustomLogger):
|
|||
result = await _cache.async_get_cache(key=_cache_key)
|
||||
if result is None:
|
||||
await self.send_alert(
|
||||
message=message, level="High", alert_type="budget_alerts"
|
||||
message=message,
|
||||
level="High",
|
||||
alert_type="budget_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
await _cache.async_set_cache(
|
||||
key=_cache_key,
|
||||
|
@ -751,6 +769,7 @@ class SlackAlerting(CustomLogger):
|
|||
level="High",
|
||||
alert_type="budget_alerts",
|
||||
user_info=webhook_event,
|
||||
alerting_metadata={},
|
||||
)
|
||||
await _cache.async_set_cache(
|
||||
key=_cache_key,
|
||||
|
@ -941,7 +960,10 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
# send minor alert
|
||||
await self.send_alert(
|
||||
message=msg, level="Medium", alert_type="outage_alerts"
|
||||
message=msg,
|
||||
level="Medium",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["minor_alert_sent"] = True
|
||||
|
@ -963,7 +985,12 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
|
||||
# send minor alert
|
||||
await self.send_alert(message=msg, level="High", alert_type="outage_alerts")
|
||||
await self.send_alert(
|
||||
message=msg,
|
||||
level="High",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["major_alert_sent"] = True
|
||||
|
||||
|
@ -1062,7 +1089,10 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
# send minor alert
|
||||
await self.send_alert(
|
||||
message=msg, level="Medium", alert_type="outage_alerts"
|
||||
message=msg,
|
||||
level="Medium",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["minor_alert_sent"] = True
|
||||
|
@ -1081,7 +1111,10 @@ class SlackAlerting(CustomLogger):
|
|||
)
|
||||
# send minor alert
|
||||
await self.send_alert(
|
||||
message=msg, level="High", alert_type="outage_alerts"
|
||||
message=msg,
|
||||
level="High",
|
||||
alert_type="outage_alerts",
|
||||
alerting_metadata={},
|
||||
)
|
||||
# set to true
|
||||
outage_value["major_alert_sent"] = True
|
||||
|
@ -1143,7 +1176,10 @@ Model Info:
|
|||
"""
|
||||
|
||||
alert_val = self.send_alert(
|
||||
message=message, level="Low", alert_type="new_model_added"
|
||||
message=message,
|
||||
level="Low",
|
||||
alert_type="new_model_added",
|
||||
alerting_metadata={},
|
||||
)
|
||||
|
||||
if alert_val is not None and asyncio.iscoroutine(alert_val):
|
||||
|
@ -1368,6 +1404,7 @@ Model Info:
|
|||
message: str,
|
||||
level: Literal["Low", "Medium", "High"],
|
||||
alert_type: Literal[AlertType],
|
||||
alerting_metadata: dict,
|
||||
user_info: Optional[WebhookEvent] = None,
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -1425,6 +1462,9 @@ Model Info:
|
|||
if kwargs:
|
||||
for key, value in kwargs.items():
|
||||
formatted_message += f"\n\n{key}: `{value}`\n\n"
|
||||
if alerting_metadata:
|
||||
for key, value in alerting_metadata.items():
|
||||
formatted_message += f"\n\n*Alerting Metadata*: \n{key}: `{value}`\n\n"
|
||||
if _proxy_base_url is not None:
|
||||
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
|
||||
|
||||
|
@ -1622,6 +1662,7 @@ Model Info:
|
|||
message=_weekly_spend_message,
|
||||
level="Low",
|
||||
alert_type="spend_reports",
|
||||
alerting_metadata={},
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error("Error sending weekly spend report", e)
|
||||
|
@ -1673,6 +1714,7 @@ Model Info:
|
|||
message=_spend_message,
|
||||
level="Low",
|
||||
alert_type="spend_reports",
|
||||
alerting_metadata={},
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error("Error sending weekly spend report", e)
|
||||
|
|
|
@ -79,8 +79,8 @@ litellm_settings:
|
|||
failure_callback: ["langfuse"]
|
||||
cache: true
|
||||
|
||||
# general_settings:
|
||||
# alerting: ["email"]
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
# key_management_system: "aws_kms"
|
||||
# key_management_settings:
|
||||
# hosted_keys: ["LITELLM_MASTER_KEY"]
|
||||
|
|
|
@ -455,6 +455,7 @@ class ProxyLogging:
|
|||
formatted_message += f"\n\nProxy URL: `{_proxy_base_url}`"
|
||||
|
||||
extra_kwargs = {}
|
||||
alerting_metadata = {}
|
||||
if request_data is not None:
|
||||
_url = self.slack_alerting_instance._add_langfuse_trace_id_to_alert(
|
||||
request_data=request_data
|
||||
|
@ -462,7 +463,12 @@ class ProxyLogging:
|
|||
if _url is not None:
|
||||
extra_kwargs["🪢 Langfuse Trace"] = _url
|
||||
formatted_message += "\n\n🪢 Langfuse Trace: {}".format(_url)
|
||||
|
||||
if (
|
||||
"metadata" in request_data
|
||||
and request_data["metadata"].get("alerting_metadata", None) is not None
|
||||
and isinstance(request_data["metadata"]["alerting_metadata"], dict)
|
||||
):
|
||||
alerting_metadata = request_data["metadata"]["alerting_metadata"]
|
||||
for client in self.alerting:
|
||||
if client == "slack":
|
||||
await self.slack_alerting_instance.send_alert(
|
||||
|
@ -470,6 +476,7 @@ class ProxyLogging:
|
|||
level=level,
|
||||
alert_type=alert_type,
|
||||
user_info=None,
|
||||
alerting_metadata=alerting_metadata,
|
||||
**extra_kwargs,
|
||||
)
|
||||
elif client == "sentry":
|
||||
|
@ -510,7 +517,7 @@ class ProxyLogging:
|
|||
)
|
||||
|
||||
if hasattr(self, "service_logging_obj"):
|
||||
self.service_logging_obj.async_service_failure_hook(
|
||||
await self.service_logging_obj.async_service_failure_hook(
|
||||
service=ServiceTypes.DB,
|
||||
duration=duration,
|
||||
error=error_message,
|
||||
|
|
|
@ -162,6 +162,29 @@ async def test_response_taking_too_long_callback(slack_alerting):
|
|||
mock_send_alert.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_alerting_metadata(slack_alerting):
|
||||
"""
|
||||
Test alerting_metadata is propogated correctly for response taking too long
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
end_time = start_time + timedelta(seconds=301)
|
||||
kwargs = {
|
||||
"model": "test_model",
|
||||
"messages": "test_messages",
|
||||
"litellm_params": {"metadata": {"alerting_metadata": {"hello": "world"}}},
|
||||
}
|
||||
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
|
||||
|
||||
## RESPONSE TAKING TOO LONG
|
||||
await slack_alerting.response_taking_too_long_callback(
|
||||
kwargs, None, start_time, end_time
|
||||
)
|
||||
mock_send_alert.assert_awaited_once()
|
||||
|
||||
assert "hello" in mock_send_alert.call_args[1]["alerting_metadata"]
|
||||
|
||||
|
||||
# Test for budget crossed
|
||||
@pytest.mark.asyncio
|
||||
async def test_budget_alerts_crossed(slack_alerting):
|
||||
|
@ -207,7 +230,9 @@ async def test_send_alert(slack_alerting):
|
|||
slack_alerting.async_http_handler, "post", new=AsyncMock()
|
||||
) as mock_post:
|
||||
mock_post.return_value.status_code = 200
|
||||
await slack_alerting.send_alert("Test message", "Low", "budget_alerts")
|
||||
await slack_alerting.send_alert(
|
||||
"Test message", "Low", "budget_alerts", alerting_metadata={}
|
||||
)
|
||||
mock_post.assert_awaited_once()
|
||||
|
||||
|
||||
|
@ -266,7 +291,7 @@ async def test_daily_reports_completion(slack_alerting):
|
|||
await asyncio.sleep(3)
|
||||
response_val = await slack_alerting.send_daily_reports(router=router)
|
||||
|
||||
assert response_val == True
|
||||
assert response_val is True
|
||||
|
||||
mock_send_alert.assert_awaited_once()
|
||||
|
||||
|
@ -291,7 +316,7 @@ async def test_daily_reports_completion(slack_alerting):
|
|||
await asyncio.sleep(3)
|
||||
response_val = await slack_alerting.send_daily_reports(router=router)
|
||||
|
||||
assert response_val == True
|
||||
assert response_val is True
|
||||
|
||||
mock_send_alert.assert_awaited()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue