From 18c2da213a75c03ac79d99ed2bc45696e6c3e814 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 11 May 2024 17:04:19 -0700 Subject: [PATCH] retry logic on router --- litellm/router.py | 57 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 579d3bde3..f651b73d4 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1507,22 +1507,30 @@ class Router: return response except Exception as e: original_exception = e - ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error - if ( - isinstance(original_exception, litellm.ContextWindowExceededError) - and context_window_fallbacks is not None - ) or ( - isinstance(original_exception, openai.RateLimitError) - and fallbacks is not None - ): - raise original_exception - ### RETRY + + """ + Retry Logic + + """ + # raises an exception if this error should not be retries + _, _healthy_deployments = self._common_checks_available_deployment( + model=kwargs.get("model"), + ) + + self.should_retry_this_error( + error=e, + healthy_deployments=_healthy_deployments, + fallbacks=fallbacks, + context_window_fallbacks=context_window_fallbacks, + ) _timeout = self._router_should_retry( e=original_exception, remaining_retries=num_retries, num_retries=num_retries, ) + + ### RETRY await asyncio.sleep(_timeout) if ( @@ -1568,6 +1576,35 @@ class Router: pass raise original_exception + def should_retry_this_error( + self, + error: Exception, + healthy_deployments: Optional[List] = None, + fallbacks: Optional[List] = None, + context_window_fallbacks: Optional[List] = None, + ): + """ + 1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None + + 2. raise an exception for RateLimitError if + - there are no fallbacks + - there are no healthy deployments in the same model group + """ + + _num_healthy_deployments = 0 + if healthy_deployments is not None and isinstance(healthy_deployments, list): + _num_healthy_deployments = len(healthy_deployments) + ### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error + if ( + isinstance(error, litellm.ContextWindowExceededError) + and context_window_fallbacks is not None + ) or ( + isinstance(error, openai.RateLimitError) + and fallbacks is not None + and _num_healthy_deployments <= 0 + ): + raise error + def function_with_fallbacks(self, *args, **kwargs): """ Try calling the function_with_retries