mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Improve Proxy Resiliency: Cooldown single-deployment model groups if 100% calls failed in high traffic (#7823)
* refactor(_is_cooldown_required): move '_is_cooldown_required' into cooldown_handlers.py * refactor(cooldown_handlers.py): move cooldown constants into `.constants.py` * fix(cooldown_handlers.py): remove if single deployment don't cooldown logic move to traffic based cooldown logic Addresses https://github.com/BerriAI/litellm/issues/7822 * fix: add unit tests for '_should_cooldown_deployment' * test: ensure all tests pass * test: update test * fix(cooldown_handlers.py): don't cooldown single deployment models for anything besides traffic related errors * fix(cooldown_handlers.py): fix cooldown handler logic * fix(cooldown_handlers.py): fix check
This commit is contained in:
parent
d00febcdaa
commit
80f7af510b
5 changed files with 220 additions and 73 deletions
|
@ -2190,6 +2190,8 @@ def test_router_context_window_pre_call_check(model, base_model, llm_provider):
|
|||
|
||||
|
||||
def test_router_cooldown_api_connection_error():
|
||||
from litellm.router_utils.cooldown_handlers import _is_cooldown_required
|
||||
|
||||
try:
|
||||
_ = litellm.completion(
|
||||
model="vertex_ai/gemini-1.5-pro",
|
||||
|
@ -2197,8 +2199,11 @@ def test_router_cooldown_api_connection_error():
|
|||
)
|
||||
except litellm.APIConnectionError as e:
|
||||
assert (
|
||||
Router()._is_cooldown_required(
|
||||
model_id="", exception_status=e.code, exception_str=str(e)
|
||||
_is_cooldown_required(
|
||||
litellm_router_instance=Router(),
|
||||
model_id="",
|
||||
exception_status=e.code,
|
||||
exception_str=str(e),
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue