unify sync and async logic for retries

This commit is contained in:
Ishaan Jaff 2024-05-11 18:17:04 -07:00
parent 06e0c4c171
commit a3b4074c22

View file

@ -1766,23 +1766,31 @@ class Router:
except Exception as e: except Exception as e:
original_exception = e original_exception = e
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
if ( _, _healthy_deployments = self._common_checks_available_deployment(
isinstance(original_exception, litellm.ContextWindowExceededError) model=kwargs.get("model"),
and context_window_fallbacks is not None )
) or (
isinstance(original_exception, openai.RateLimitError) # raises an exception if this error should not be retries
and fallbacks is not None self.should_retry_this_error(
): error=e,
raise original_exception healthy_deployments=_healthy_deployments,
## LOGGING fallbacks=fallbacks,
if num_retries > 0: context_window_fallbacks=context_window_fallbacks,
kwargs = self.log_retry(kwargs=kwargs, e=original_exception) )
### RETRY
# decides how long to sleep before retry
_timeout = self._time_to_sleep_before_retry( _timeout = self._time_to_sleep_before_retry(
e=original_exception, e=original_exception,
remaining_retries=num_retries, remaining_retries=num_retries,
num_retries=num_retries, num_retries=num_retries,
_healthy_deployments=_healthy_deployments,
fallbacks=fallbacks,
) )
## LOGGING
if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
time.sleep(_timeout) time.sleep(_timeout)
for current_attempt in range(num_retries): for current_attempt in range(num_retries):
verbose_router_logger.debug( verbose_router_logger.debug(
@ -1796,11 +1804,16 @@ class Router:
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e) kwargs = self.log_retry(kwargs=kwargs, e=e)
_, _healthy_deployments = self._common_checks_available_deployment(
model=kwargs.get("model"),
)
remaining_retries = num_retries - current_attempt remaining_retries = num_retries - current_attempt
_timeout = self._time_to_sleep_before_retry( _timeout = self._time_to_sleep_before_retry(
e=e, e=e,
remaining_retries=remaining_retries, remaining_retries=remaining_retries,
num_retries=num_retries, num_retries=num_retries,
healthy_deployments=_healthy_deployments,
fallbacks=fallbacks,
) )
time.sleep(_timeout) time.sleep(_timeout)
raise original_exception raise original_exception