mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Improve Proxy Resiliency: Cooldown single-deployment model groups if 100% calls failed in high traffic (#7823)
* refactor(_is_cooldown_required): move '_is_cooldown_required' into cooldown_handlers.py * refactor(cooldown_handlers.py): move cooldown constants into `.constants.py` * fix(cooldown_handlers.py): remove if single deployment don't cooldown logic move to traffic based cooldown logic Addresses https://github.com/BerriAI/litellm/issues/7822 * fix: add unit tests for '_should_cooldown_deployment' * test: ensure all tests pass * test: update test * fix(cooldown_handlers.py): don't cooldown single deployment models for anything besides traffic related errors * fix(cooldown_handlers.py): fix cooldown handler logic * fix(cooldown_handlers.py): fix check
This commit is contained in:
parent
d00febcdaa
commit
80f7af510b
5 changed files with 220 additions and 73 deletions
|
@ -3617,66 +3617,6 @@ class Router:
|
|||
|
||||
return request_count
|
||||
|
||||
def _is_cooldown_required(
|
||||
self,
|
||||
model_id: str,
|
||||
exception_status: Union[str, int],
|
||||
exception_str: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
A function to determine if a cooldown is required based on the exception status.
|
||||
|
||||
Parameters:
|
||||
model_id (str) The id of the model in the model list
|
||||
exception_status (Union[str, int]): The status of the exception.
|
||||
|
||||
Returns:
|
||||
bool: True if a cooldown is required, False otherwise.
|
||||
"""
|
||||
## BASE CASE - single deployment
|
||||
model_group = self.get_model_group(id=model_id)
|
||||
if model_group is not None and len(model_group) == 1:
|
||||
return False
|
||||
|
||||
try:
|
||||
ignored_strings = ["APIConnectionError"]
|
||||
if (
|
||||
exception_str is not None
|
||||
): # don't cooldown on litellm api connection errors errors
|
||||
for ignored_string in ignored_strings:
|
||||
if ignored_string in exception_str:
|
||||
return False
|
||||
|
||||
if isinstance(exception_status, str):
|
||||
exception_status = int(exception_status)
|
||||
|
||||
if exception_status >= 400 and exception_status < 500:
|
||||
if exception_status == 429:
|
||||
# Cool down 429 Rate Limit Errors
|
||||
return True
|
||||
|
||||
elif exception_status == 401:
|
||||
# Cool down 401 Auth Errors
|
||||
return True
|
||||
|
||||
elif exception_status == 408:
|
||||
return True
|
||||
|
||||
elif exception_status == 404:
|
||||
return True
|
||||
|
||||
else:
|
||||
# Do NOT cool down all other 4XX Errors
|
||||
return False
|
||||
|
||||
else:
|
||||
# should cool down for all other errors
|
||||
return True
|
||||
|
||||
except Exception:
|
||||
# Catch all - if any exceptions default to cooling down
|
||||
return True
|
||||
|
||||
def _has_default_fallbacks(self) -> bool:
|
||||
if self.fallbacks is None:
|
||||
return False
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue