retry logic on router

2025-04-26 11:14:04 +00:00 · 2024-05-11 17:04:19 -07:00 · 2024-05-11 17:04:19 -07:00 · 18c2da213a
commit 18c2da213a
parent f0c727a597
1 changed files with 47 additions and 10 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -1507,22 +1507,30 @@ class Router:
            return response
        except Exception as e:
            original_exception = e
-            ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
+
-            if (
+            """
-                isinstance(original_exception, litellm.ContextWindowExceededError)
+            Retry Logic
-                and context_window_fallbacks is not None
+             
-            ) or (
+            """
-                isinstance(original_exception, openai.RateLimitError)
+            # raises an exception if this error should not be retries
-                and fallbacks is not None
+            _, _healthy_deployments = self._common_checks_available_deployment(
-            ):
+                model=kwargs.get("model"),
-                raise original_exception
+            )
-            ### RETRY
+
            self.should_retry_this_error(
                error=e,
                healthy_deployments=_healthy_deployments,
                fallbacks=fallbacks,
                context_window_fallbacks=context_window_fallbacks,
            )
            _timeout = self._router_should_retry(
                e=original_exception,
                remaining_retries=num_retries,
                num_retries=num_retries,
            )
            ### RETRY
            await asyncio.sleep(_timeout)
            if (
@ -1568,6 +1576,35 @@ class Router:
                pass
            raise original_exception
    def should_retry_this_error(
        self,
        error: Exception,
        healthy_deployments: Optional[List] = None,
        fallbacks: Optional[List] = None,
        context_window_fallbacks: Optional[List] = None,
    ):
        """
        1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
        2. raise an exception for RateLimitError if
            - there are no fallbacks
            - there are no healthy deployments in the same model group
        """
        _num_healthy_deployments = 0
        if healthy_deployments is not None and isinstance(healthy_deployments, list):
            _num_healthy_deployments = len(healthy_deployments)
        ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
        if (
            isinstance(error, litellm.ContextWindowExceededError)
            and context_window_fallbacks is not None
        ) or (
            isinstance(error, openai.RateLimitError)
            and fallbacks is not None
            and _num_healthy_deployments <= 0
        ):
            raise error
    def function_with_fallbacks(self, *args, **kwargs):
        """
        Try calling the function_with_retries