mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
feat(slack_alerting.py): refactor region outage alerting to do model based alerting instead
Unable to extract azure region from api base, makes sense to start with model alerting and then move to region
This commit is contained in:
parent
a8fb4e33d5
commit
4536ed6f6e
7 changed files with 119 additions and 51 deletions
|
@ -3876,13 +3876,13 @@ class Router:
|
|||
_api_base = litellm.get_api_base(
|
||||
model=_model_name, optional_params=temp_litellm_params
|
||||
)
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.slack_alerting_instance.send_alert(
|
||||
message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
|
||||
alert_type="cooldown_deployment",
|
||||
level="Low",
|
||||
)
|
||||
)
|
||||
# asyncio.create_task(
|
||||
# proxy_logging_obj.slack_alerting_instance.send_alert(
|
||||
# message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
|
||||
# alert_type="cooldown_deployment",
|
||||
# level="Low",
|
||||
# )
|
||||
# )
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue