use router_cooldown_handler

This commit is contained in:
Ishaan Jaff 2024-08-07 10:40:55 -07:00
parent 99ecde7744
commit d1e519afd1
2 changed files with 60 additions and 40 deletions

View file

@ -57,6 +57,7 @@ from litellm.router_utils.client_initalization_utils import (
set_client,
should_initialize_sync_client,
)
from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
from litellm.router_utils.handle_error import send_llm_exception_alert
from litellm.scheduler import FlowItem, Scheduler
from litellm.types.llms.openai import (
@ -3294,10 +3295,14 @@ class Router:
value=cached_value, key=cooldown_key, ttl=cooldown_time
)
self.send_deployment_cooldown_alert(
deployment_id=deployment,
exception_status=exception_status,
cooldown_time=cooldown_time,
# Trigger cooldown handler
asyncio.create_task(
router_cooldown_handler(
litellm_router_instance=self,
deployment_id=deployment,
exception_status=exception_status,
cooldown_time=cooldown_time,
)
)
else:
self.failed_calls.set_cache(
@ -4948,42 +4953,6 @@ class Router:
)
print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa
def send_deployment_cooldown_alert(
self,
deployment_id: str,
exception_status: Union[str, int],
cooldown_time: float,
):
try:
from litellm.proxy.proxy_server import proxy_logging_obj
# trigger slack alert saying deployment is in cooldown
if (
proxy_logging_obj is not None
and proxy_logging_obj.alerting is not None
and "slack" in proxy_logging_obj.alerting
):
_deployment = self.get_deployment(model_id=deployment_id)
if _deployment is None:
return
_litellm_params = _deployment["litellm_params"]
temp_litellm_params = copy.deepcopy(_litellm_params)
temp_litellm_params = dict(temp_litellm_params)
_model_name = _deployment.get("model_name", None)
_api_base = litellm.get_api_base(
model=_model_name, optional_params=temp_litellm_params
)
# asyncio.create_task(
# proxy_logging_obj.slack_alerting_instance.send_alert(
# message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
# alert_type="cooldown_deployment",
# level="Low",
# )
# )
except Exception as e:
pass
def set_custom_routing_strategy(
self, CustomRoutingStrategy: CustomRoutingStrategyBase
):