diff --git a/litellm/router.py b/litellm/router.py index c0cc6dfba1..23000d9575 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1766,23 +1766,31 @@ class Router: except Exception as e: original_exception = e ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR - if ( - isinstance(original_exception, litellm.ContextWindowExceededError) - and context_window_fallbacks is not None - ) or ( - isinstance(original_exception, openai.RateLimitError) - and fallbacks is not None - ): - raise original_exception - ## LOGGING - if num_retries > 0: - kwargs = self.log_retry(kwargs=kwargs, e=original_exception) - ### RETRY + _, _healthy_deployments = self._common_checks_available_deployment( + model=kwargs.get("model"), + ) + + # raises an exception if this error should not be retries + self.should_retry_this_error( + error=e, + healthy_deployments=_healthy_deployments, + fallbacks=fallbacks, + context_window_fallbacks=context_window_fallbacks, + ) + + # decides how long to sleep before retry _timeout = self._time_to_sleep_before_retry( e=original_exception, remaining_retries=num_retries, num_retries=num_retries, + _healthy_deployments=_healthy_deployments, + fallbacks=fallbacks, ) + + ## LOGGING + if num_retries > 0: + kwargs = self.log_retry(kwargs=kwargs, e=original_exception) + time.sleep(_timeout) for current_attempt in range(num_retries): verbose_router_logger.debug( @@ -1796,11 +1804,16 @@ class Router: except Exception as e: ## LOGGING kwargs = self.log_retry(kwargs=kwargs, e=e) + _, _healthy_deployments = self._common_checks_available_deployment( + model=kwargs.get("model"), + ) remaining_retries = num_retries - current_attempt _timeout = self._time_to_sleep_before_retry( e=e, remaining_retries=remaining_retries, num_retries=num_retries, + healthy_deployments=_healthy_deployments, + fallbacks=fallbacks, ) time.sleep(_timeout) raise original_exception