Improve Proxy Resiliency: Cooldown single-deployment model groups if 100% calls failed in high traffic (#7823)

* refactor(_is_cooldown_required): move '_is_cooldown_required' into cooldown_handlers.py * refactor(cooldown_handlers.py): move cooldown constants into `.constants.py` * fix(cooldown_handlers.py): remove if single deployment don't cooldown logic move to traffic based cooldown logic Addresses https://github.com/BerriAI/litellm/issues/7822 * fix: add unit tests for '_should_cooldown_deployment' * test: ensure all tests pass * test: update test * fix(cooldown_handlers.py): don't cooldown single deployment models for anything besides traffic related errors * fix(cooldown_handlers.py): fix cooldown handler logic * fix(cooldown_handlers.py): fix check
2025-04-26 03:04:13 +00:00 · 2025-01-17 20:17:02 -08:00 · 2025-01-17 20:17:02 -08:00 · 80f7af510b
commit 80f7af510b
parent d00febcdaa
5 changed files with 220 additions and 73 deletions
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@ -2190,6 +2190,8 @@ def test_router_context_window_pre_call_check(model, base_model, llm_provider):


 def test_router_cooldown_api_connection_error():
+    from litellm.router_utils.cooldown_handlers import _is_cooldown_required
+
    try:
        _ = litellm.completion(
            model="vertex_ai/gemini-1.5-pro",
@ -2197,8 +2199,11 @@ def test_router_cooldown_api_connection_error():
        )
    except litellm.APIConnectionError as e:
        assert (
-            Router()._is_cooldown_required(
-                model_id="", exception_status=e.code, exception_str=str(e)
+            _is_cooldown_required(
+                litellm_router_instance=Router(),
+                model_id="",
+                exception_status=e.code,
+                exception_str=str(e),
            )
            is False
        )