From d1e519afd14f045609cf64db0c28afd6788dc568 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 7 Aug 2024 10:40:55 -0700 Subject: [PATCH] use router_cooldown_handler --- litellm/router.py | 49 ++++----------------- litellm/router_utils/cooldown_callbacks.py | 51 ++++++++++++++++++++++ 2 files changed, 60 insertions(+), 40 deletions(-) create mode 100644 litellm/router_utils/cooldown_callbacks.py diff --git a/litellm/router.py b/litellm/router.py index aa9768ba4..a6ec01b06 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -57,6 +57,7 @@ from litellm.router_utils.client_initalization_utils import ( set_client, should_initialize_sync_client, ) +from litellm.router_utils.cooldown_callbacks import router_cooldown_handler from litellm.router_utils.handle_error import send_llm_exception_alert from litellm.scheduler import FlowItem, Scheduler from litellm.types.llms.openai import ( @@ -3294,10 +3295,14 @@ class Router: value=cached_value, key=cooldown_key, ttl=cooldown_time ) - self.send_deployment_cooldown_alert( - deployment_id=deployment, - exception_status=exception_status, - cooldown_time=cooldown_time, + # Trigger cooldown handler + asyncio.create_task( + router_cooldown_handler( + litellm_router_instance=self, + deployment_id=deployment, + exception_status=exception_status, + cooldown_time=cooldown_time, + ) ) else: self.failed_calls.set_cache( @@ -4948,42 +4953,6 @@ class Router: ) print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa - def send_deployment_cooldown_alert( - self, - deployment_id: str, - exception_status: Union[str, int], - cooldown_time: float, - ): - try: - from litellm.proxy.proxy_server import proxy_logging_obj - - # trigger slack alert saying deployment is in cooldown - if ( - proxy_logging_obj is not None - and proxy_logging_obj.alerting is not None - and "slack" in proxy_logging_obj.alerting - ): - _deployment = self.get_deployment(model_id=deployment_id) - if _deployment is None: - return - - _litellm_params = _deployment["litellm_params"] - temp_litellm_params = copy.deepcopy(_litellm_params) - temp_litellm_params = dict(temp_litellm_params) - _model_name = _deployment.get("model_name", None) - _api_base = litellm.get_api_base( - model=_model_name, optional_params=temp_litellm_params - ) - # asyncio.create_task( - # proxy_logging_obj.slack_alerting_instance.send_alert( - # message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns", - # alert_type="cooldown_deployment", - # level="Low", - # ) - # ) - except Exception as e: - pass - def set_custom_routing_strategy( self, CustomRoutingStrategy: CustomRoutingStrategyBase ): diff --git a/litellm/router_utils/cooldown_callbacks.py b/litellm/router_utils/cooldown_callbacks.py new file mode 100644 index 000000000..00e89274b --- /dev/null +++ b/litellm/router_utils/cooldown_callbacks.py @@ -0,0 +1,51 @@ +""" +Callbacks triggered on cooling down deployments +""" + +import copy +from typing import TYPE_CHECKING, Any, Union + +import litellm +from litellm._logging import verbose_logger + +if TYPE_CHECKING: + from litellm.router import Router as _Router + + LitellmRouter = _Router +else: + LitellmRouter = Any + + +async def router_cooldown_handler( + litellm_router_instance: LitellmRouter, + deployment_id: str, + exception_status: Union[str, int], + cooldown_time: float, +): + _deployment = litellm_router_instance.get_deployment(model_id=deployment_id) + if _deployment is None: + verbose_logger.warning( + f"in router_cooldown_handler but _deployment is None for deployment_id={deployment_id}. Doing nothing" + ) + return + _litellm_params = _deployment["litellm_params"] + temp_litellm_params = copy.deepcopy(_litellm_params) + temp_litellm_params = dict(temp_litellm_params) + _model_name = _deployment.get("model_name", None) + _api_base = litellm.get_api_base( + model=_model_name, optional_params=temp_litellm_params + ) + model_info = _deployment["model_info"] + model_id = model_info.id + + # Trigger cooldown on Prometheus + from litellm.litellm_core_utils.litellm_logging import prometheusLogger + + if prometheusLogger is not None: + prometheusLogger.set_llm_outage_metric( + litellm_model_name=_model_name, + model_id=model_id, + api_base="", + api_provider="", + ) + pass