mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
retry logic on router
This commit is contained in:
parent
f0c727a597
commit
18c2da213a
1 changed files with 47 additions and 10 deletions
|
@ -1507,22 +1507,30 @@ class Router:
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
original_exception = e
|
original_exception = e
|
||||||
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
|
|
||||||
if (
|
"""
|
||||||
isinstance(original_exception, litellm.ContextWindowExceededError)
|
Retry Logic
|
||||||
and context_window_fallbacks is not None
|
|
||||||
) or (
|
"""
|
||||||
isinstance(original_exception, openai.RateLimitError)
|
# raises an exception if this error should not be retries
|
||||||
and fallbacks is not None
|
_, _healthy_deployments = self._common_checks_available_deployment(
|
||||||
):
|
model=kwargs.get("model"),
|
||||||
raise original_exception
|
)
|
||||||
### RETRY
|
|
||||||
|
self.should_retry_this_error(
|
||||||
|
error=e,
|
||||||
|
healthy_deployments=_healthy_deployments,
|
||||||
|
fallbacks=fallbacks,
|
||||||
|
context_window_fallbacks=context_window_fallbacks,
|
||||||
|
)
|
||||||
|
|
||||||
_timeout = self._router_should_retry(
|
_timeout = self._router_should_retry(
|
||||||
e=original_exception,
|
e=original_exception,
|
||||||
remaining_retries=num_retries,
|
remaining_retries=num_retries,
|
||||||
num_retries=num_retries,
|
num_retries=num_retries,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
### RETRY
|
||||||
await asyncio.sleep(_timeout)
|
await asyncio.sleep(_timeout)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
@ -1568,6 +1576,35 @@ class Router:
|
||||||
pass
|
pass
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
|
def should_retry_this_error(
|
||||||
|
self,
|
||||||
|
error: Exception,
|
||||||
|
healthy_deployments: Optional[List] = None,
|
||||||
|
fallbacks: Optional[List] = None,
|
||||||
|
context_window_fallbacks: Optional[List] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
|
||||||
|
|
||||||
|
2. raise an exception for RateLimitError if
|
||||||
|
- there are no fallbacks
|
||||||
|
- there are no healthy deployments in the same model group
|
||||||
|
"""
|
||||||
|
|
||||||
|
_num_healthy_deployments = 0
|
||||||
|
if healthy_deployments is not None and isinstance(healthy_deployments, list):
|
||||||
|
_num_healthy_deployments = len(healthy_deployments)
|
||||||
|
### CHECK IF RATE LIMIT / CONTEXT WINDOW ERROR w/ fallbacks available / Bad Request Error
|
||||||
|
if (
|
||||||
|
isinstance(error, litellm.ContextWindowExceededError)
|
||||||
|
and context_window_fallbacks is not None
|
||||||
|
) or (
|
||||||
|
isinstance(error, openai.RateLimitError)
|
||||||
|
and fallbacks is not None
|
||||||
|
and _num_healthy_deployments <= 0
|
||||||
|
):
|
||||||
|
raise error
|
||||||
|
|
||||||
def function_with_fallbacks(self, *args, **kwargs):
|
def function_with_fallbacks(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Try calling the function_with_retries
|
Try calling the function_with_retries
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue