diff --git a/docs/my-website/docs/proxy/alerting.md b/docs/my-website/docs/proxy/alerting.md index 08030f478..b8e5ebe20 100644 --- a/docs/my-website/docs/proxy/alerting.md +++ b/docs/my-website/docs/proxy/alerting.md @@ -119,8 +119,8 @@ All Possible Alert Types ```python AlertType = Literal[ - "llm_exceptions", - "llm_too_slow", + "llm_exceptions", # LLM API Exceptions + "llm_too_slow", # LLM Responses slower than alerting_threshold "llm_requests_hanging", "budget_alerts", "db_exceptions", @@ -133,6 +133,61 @@ AlertType = Literal[ ``` +## Advanced - set specific slack channels per alert type + +Use this if you want to set specific channels per alert type + +**This allows you to do the following** +``` +llm_exceptions -> go to slack channel #llm-exceptions +spend_reports -> go to slack channel #llm-spend-reports +``` + +Set `alert_to_webhook_url` on your config.yaml + +```yaml +model_list: + - model_name: gpt-4 + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + +general_settings: + master_key: sk-1234 + alerting: ["slack"] + alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting + alert_to_webhook_url: { + "llm_exceptions": "https://hooks.slack.com/services/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", + "llm_too_slow": "https://hooks.slack.com/services/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", + "llm_requests_hanging": "https://hooks.slack.com/services/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", + "budget_alerts": "https://hooks.slack.com/services/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", + "db_exceptions": "https://hooks.slack.com/services/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", + "daily_reports": "https://hooks.slack.com/services/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", + "spend_reports": "https://hooks.slack.com/services/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", + "cooldown_deployment": "https://hooks.slack.com/services/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", + "new_model_added": "https://hooks.slack.com/services/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", + "outage_alerts": "https://hooks.slack.com/services/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", + } + +litellm_settings: + success_callback: ["langfuse"] +``` + +Test it - send a valid llm request - expect to see a `llm_too_slow` alert in it's own slack channel + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4", + "messages": [ + {"role": "user", "content": "Hello, Claude gm!"} + ] +}' +``` + ## Advanced - Using MS Teams Webhooks diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 60ddfba32..0e3f0826e 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -10,6 +10,12 @@ model_list: api_key: "os.environ/FIREWORKS" general_settings: master_key: sk-1234 + alerting: ["slack"] + alerting_threshold: 0.0001 + alert_to_webhook_url: { + "llm_too_slow": "https://hooks.slack.com/services/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", + "llm_requests_hanging": "https://hooks.slack.com/services/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", + } litellm_settings: success_callback: ["langfuse"] \ No newline at end of file diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 3ab864381..040348275 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1623,6 +1623,7 @@ class ProxyConfig: alerting=general_settings.get("alerting", None), alerting_threshold=general_settings.get("alerting_threshold", 600), alert_types=general_settings.get("alert_types", None), + alert_to_webhook_url=general_settings.get("alert_to_webhook_url", None), alerting_args=general_settings.get("alerting_args", None), redis_cache=redis_usage_cache, ) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index a982c6cd7..5e693deef 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -202,6 +202,7 @@ class ProxyLogging: redis_cache: Optional[RedisCache] = None, alert_types: Optional[List[AlertType]] = None, alerting_args: Optional[dict] = None, + alert_to_webhook_url: Optional[dict] = None, ): updated_slack_alerting: bool = False if alerting is not None: @@ -213,6 +214,9 @@ class ProxyLogging: if alert_types is not None: self.alert_types = alert_types updated_slack_alerting = True + if alert_to_webhook_url is not None: + self.alert_to_webhook_url = alert_to_webhook_url + updated_slack_alerting = True if updated_slack_alerting is True: self.slack_alerting_instance.update_values( @@ -220,6 +224,7 @@ class ProxyLogging: alerting_threshold=self.alerting_threshold, alert_types=self.alert_types, alerting_args=alerting_args, + alert_to_webhook_url=self.alert_to_webhook_url, ) if (