unify sync and async logic for retries

This commit is contained in:
Ishaan Jaff 2024-05-11 18:17:04 -07:00
parent 06e0c4c171
commit a3b4074c22

View file

@ -1766,23 +1766,31 @@ class Router:
except Exception as e:
original_exception = e
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
if (
isinstance(original_exception, litellm.ContextWindowExceededError)
and context_window_fallbacks is not None
) or (
isinstance(original_exception, openai.RateLimitError)
and fallbacks is not None
):
raise original_exception
## LOGGING
if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
### RETRY
_, _healthy_deployments = self._common_checks_available_deployment(
model=kwargs.get("model"),
)
# raises an exception if this error should not be retries
self.should_retry_this_error(
error=e,
healthy_deployments=_healthy_deployments,
fallbacks=fallbacks,
context_window_fallbacks=context_window_fallbacks,
)
# decides how long to sleep before retry
_timeout = self._time_to_sleep_before_retry(
e=original_exception,
remaining_retries=num_retries,
num_retries=num_retries,
_healthy_deployments=_healthy_deployments,
fallbacks=fallbacks,
)
## LOGGING
if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
time.sleep(_timeout)
for current_attempt in range(num_retries):
verbose_router_logger.debug(
@ -1796,11 +1804,16 @@ class Router:
except Exception as e:
## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e)
_, _healthy_deployments = self._common_checks_available_deployment(
model=kwargs.get("model"),
)
remaining_retries = num_retries - current_attempt
_timeout = self._time_to_sleep_before_retry(
e=e,
remaining_retries=remaining_retries,
num_retries=num_retries,
healthy_deployments=_healthy_deployments,
fallbacks=fallbacks,
)
time.sleep(_timeout)
raise original_exception