forked from phoenix/litellm-mirror
feat - router use _is_cooldown_required
This commit is contained in:
parent
ae80148c12
commit
f17f0a09d8
1 changed files with 40 additions and 3 deletions
|
@ -1962,6 +1962,45 @@ class Router:
|
||||||
key=rpm_key, value=request_count, local_only=True
|
key=rpm_key, value=request_count, local_only=True
|
||||||
) # don't change existing ttl
|
) # don't change existing ttl
|
||||||
|
|
||||||
|
def _is_cooldown_required(self, exception_status: Union[str, int]):
|
||||||
|
"""
|
||||||
|
A function to determine if a cooldown is required based on the exception status.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
exception_status (Union[str, int]): The status of the exception.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if a cooldown is required, False otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
|
||||||
|
if isinstance(exception_status, str):
|
||||||
|
exception_status = int(exception_status)
|
||||||
|
|
||||||
|
if exception_status >= 400 and exception_status < 500:
|
||||||
|
if exception_status == 429:
|
||||||
|
# Cool down 429 Rate Limit Errors
|
||||||
|
return True
|
||||||
|
|
||||||
|
elif exception_status == 401:
|
||||||
|
# Cool down 401 Auth Errors
|
||||||
|
return True
|
||||||
|
|
||||||
|
elif exception_status == 408:
|
||||||
|
return True
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Do NOT cool down all other 4XX Errors
|
||||||
|
return False
|
||||||
|
|
||||||
|
else:
|
||||||
|
# should cool down for all other errors
|
||||||
|
return True
|
||||||
|
|
||||||
|
except:
|
||||||
|
# Catch all - if any exceptions default to cooling down
|
||||||
|
return True
|
||||||
|
|
||||||
def _set_cooldown_deployments(
|
def _set_cooldown_deployments(
|
||||||
self, exception_status: Union[str, int], deployment: Optional[str] = None
|
self, exception_status: Union[str, int], deployment: Optional[str] = None
|
||||||
):
|
):
|
||||||
|
@ -1975,9 +2014,7 @@ class Router:
|
||||||
if deployment is None:
|
if deployment is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
if exception_status == 400 or exception_status == "400":
|
if self._is_cooldown_required(exception_status=exception_status) == False:
|
||||||
# the user sent a BadRequest -> the Deployment is not unhealthy here
|
|
||||||
# Do NOT Cooldown
|
|
||||||
return
|
return
|
||||||
|
|
||||||
dt = get_utc_datetime()
|
dt = get_utc_datetime()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue