Merge pull request #3532 from BerriAI/litellm_send_alert_on_cooling_down_deploymeny

[Feat] send alert on cooling down deployment
This commit is contained in:
Ishaan Jaff 2024-05-08 14:30:31 -07:00 committed by GitHub
commit ba08a82885
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 38 additions and 0 deletions

View file

@ -710,6 +710,7 @@ Model Info:
"db_exceptions",
"daily_reports",
"new_model_added",
"cooldown_deployment",
],
**kwargs,
):

View file

@ -1862,6 +1862,10 @@ class Router:
self.cache.set_cache(
value=cached_value, key=cooldown_key, ttl=cooldown_time
)
self.send_deployment_cooldown_alert(
deployment_id=deployment, exception_status=exception_status
)
else:
self.failed_calls.set_cache(
key=deployment, value=updated_fails, ttl=cooldown_time
@ -3384,6 +3388,39 @@ class Router:
)
print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa
def send_deployment_cooldown_alert(
self, deployment_id: str, exception_status: Union[str, int]
):
try:
from litellm.proxy.proxy_server import proxy_logging_obj
# trigger slack alert saying deployment is in cooldown
if (
proxy_logging_obj is not None
and proxy_logging_obj.alerting is not None
and "slack" in proxy_logging_obj.alerting
):
_deployment = self.get_deployment(model_id=deployment_id)
if _deployment is None:
return
_litellm_params = _deployment["litellm_params"]
temp_litellm_params = copy.deepcopy(_litellm_params)
temp_litellm_params = dict(temp_litellm_params)
_model_name = _deployment.get("model_name", None)
_api_base = litellm.get_api_base(
model=_model_name, optional_params=temp_litellm_params
)
asyncio.create_task(
proxy_logging_obj.slack_alerting_instance.send_alert(
message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}",
alert_type="cooldown_deployment",
level="Low",
)
)
except Exception as e:
pass
def flush_cache(self):
litellm.cache = None
self.cache.flush_cache()