forked from phoenix/litellm-mirror
fix _time_to_sleep_before_retry logic
This commit is contained in:
parent
ed8a25c630
commit
3e6097d9f8
1 changed files with 27 additions and 6 deletions
|
@ -1524,10 +1524,12 @@ class Router:
|
||||||
context_window_fallbacks=context_window_fallbacks,
|
context_window_fallbacks=context_window_fallbacks,
|
||||||
)
|
)
|
||||||
|
|
||||||
_timeout = self._router_should_retry(
|
_timeout = self._time_to_sleep_before_retry(
|
||||||
e=original_exception,
|
e=original_exception,
|
||||||
remaining_retries=num_retries,
|
remaining_retries=num_retries,
|
||||||
num_retries=num_retries,
|
num_retries=num_retries,
|
||||||
|
_healthy_deployments=_healthy_deployments,
|
||||||
|
fallbacks=fallbacks,
|
||||||
)
|
)
|
||||||
|
|
||||||
### RETRY
|
### RETRY
|
||||||
|
@ -1564,7 +1566,7 @@ class Router:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||||
remaining_retries = num_retries - current_attempt
|
remaining_retries = num_retries - current_attempt
|
||||||
_timeout = self._router_should_retry(
|
_timeout = self._time_to_sleep_before_retry(
|
||||||
e=original_exception,
|
e=original_exception,
|
||||||
remaining_retries=remaining_retries,
|
remaining_retries=remaining_retries,
|
||||||
num_retries=num_retries,
|
num_retries=num_retries,
|
||||||
|
@ -1697,12 +1699,31 @@ class Router:
|
||||||
raise e
|
raise e
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
def _router_should_retry(
|
def _time_to_sleep_before_retry(
|
||||||
self, e: Exception, remaining_retries: int, num_retries: int
|
self,
|
||||||
|
e: Exception,
|
||||||
|
remaining_retries: int,
|
||||||
|
num_retries: int,
|
||||||
|
healthy_deployments: Optional[List] = None,
|
||||||
|
fallbacks: Optional[List] = None,
|
||||||
) -> Union[int, float]:
|
) -> Union[int, float]:
|
||||||
"""
|
"""
|
||||||
Calculate back-off, then retry
|
Calculate back-off, then retry
|
||||||
|
|
||||||
|
It should instantly retry only when:
|
||||||
|
1. there are healthy deployments in the same model group
|
||||||
|
2. there are fallbacks for the completion call
|
||||||
"""
|
"""
|
||||||
|
if (
|
||||||
|
healthy_deployments is not None
|
||||||
|
and isinstance(healthy_deployments, list)
|
||||||
|
and len(healthy_deployments) > 0
|
||||||
|
):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if fallbacks is not None and isinstance(fallbacks, list) and len(fallbacks) > 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
if hasattr(e, "response") and hasattr(e.response, "headers"):
|
if hasattr(e, "response") and hasattr(e.response, "headers"):
|
||||||
timeout = litellm._calculate_retry_after(
|
timeout = litellm._calculate_retry_after(
|
||||||
remaining_retries=remaining_retries,
|
remaining_retries=remaining_retries,
|
||||||
|
@ -1751,7 +1772,7 @@ class Router:
|
||||||
if num_retries > 0:
|
if num_retries > 0:
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||||
### RETRY
|
### RETRY
|
||||||
_timeout = self._router_should_retry(
|
_timeout = self._time_to_sleep_before_retry(
|
||||||
e=original_exception,
|
e=original_exception,
|
||||||
remaining_retries=num_retries,
|
remaining_retries=num_retries,
|
||||||
num_retries=num_retries,
|
num_retries=num_retries,
|
||||||
|
@ -1770,7 +1791,7 @@ class Router:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||||
remaining_retries = num_retries - current_attempt
|
remaining_retries = num_retries - current_attempt
|
||||||
_timeout = self._router_should_retry(
|
_timeout = self._time_to_sleep_before_retry(
|
||||||
e=e,
|
e=e,
|
||||||
remaining_retries=remaining_retries,
|
remaining_retries=remaining_retries,
|
||||||
num_retries=num_retries,
|
num_retries=num_retries,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue