unify sync and async logic for retries

2025-04-26 11:14:04 +00:00 · 2024-05-11 18:17:04 -07:00 · 2024-05-11 18:17:04 -07:00 · a3b4074c22
commit a3b4074c22
parent 06e0c4c171
1 changed files with 25 additions and 12 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -1766,23 +1766,31 @@ class Router:
        except Exception as e:
            original_exception = e
            ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR
-            if (
-                isinstance(original_exception, litellm.ContextWindowExceededError)
-                and context_window_fallbacks is not None
-            ) or (
-                isinstance(original_exception, openai.RateLimitError)
-                and fallbacks is not None
-            ):
-                raise original_exception
-            ## LOGGING
-            if num_retries > 0:
-                kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
-            ### RETRY
+            _, _healthy_deployments = self._common_checks_available_deployment(
+                model=kwargs.get("model"),
+            )
+
+            # raises an exception if this error should not be retries
+            self.should_retry_this_error(
+                error=e,
+                healthy_deployments=_healthy_deployments,
+                fallbacks=fallbacks,
+                context_window_fallbacks=context_window_fallbacks,
+            )
+
+            # decides how long to sleep before retry
            _timeout = self._time_to_sleep_before_retry(
                e=original_exception,
                remaining_retries=num_retries,
                num_retries=num_retries,
+                _healthy_deployments=_healthy_deployments,
+                fallbacks=fallbacks,
            )
+
+            ## LOGGING
+            if num_retries > 0:
+                kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
+
            time.sleep(_timeout)
            for current_attempt in range(num_retries):
                verbose_router_logger.debug(
@ -1796,11 +1804,16 @@ class Router:
                except Exception as e:
                    ## LOGGING
                    kwargs = self.log_retry(kwargs=kwargs, e=e)
+                    _, _healthy_deployments = self._common_checks_available_deployment(
+                        model=kwargs.get("model"),
+                    )
                    remaining_retries = num_retries - current_attempt
                    _timeout = self._time_to_sleep_before_retry(
                        e=e,
                        remaining_retries=remaining_retries,
                        num_retries=num_retries,
+                        healthy_deployments=_healthy_deployments,
+                        fallbacks=fallbacks,
                    )
                    time.sleep(_timeout)
            raise original_exception