fix(router.py): fix sync should_retry logic

This commit is contained in:
Krrish Dholakia 2024-04-27 14:48:07 -07:00
parent f31d3c4e9f
commit 5e0bd5982e
2 changed files with 103 additions and 53 deletions

View file

@ -1453,6 +1453,7 @@ class Router:
await asyncio.sleep(timeout)
elif RouterErrors.user_defined_ratelimit_error.value in str(e):
raise e # don't wait to retry if deployment hits user-defined rate-limit
elif hasattr(original_exception, "status_code") and litellm._should_retry(
status_code=original_exception.status_code
):
@ -1614,6 +1615,38 @@ class Router:
raise e
raise original_exception
def _router_should_retry(
self, e: Exception, remaining_retries: int, num_retries: int
):
if "No models available" in str(e):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
elif (
hasattr(e, "status_code")
and hasattr(e, "response")
and litellm._should_retry(status_code=e.status_code)
):
if hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
else:
raise e
def function_with_retries(self, *args, **kwargs):
"""
Try calling the model 3 times. Shuffle between available deployments.
@ -1649,6 +1682,11 @@ class Router:
if num_retries > 0:
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
### RETRY
self._router_should_retry(
e=original_exception,
remaining_retries=num_retries,
num_retries=num_retries,
)
for current_attempt in range(num_retries):
verbose_router_logger.debug(
f"retrying request. Current attempt - {current_attempt}; retries left: {num_retries}"
@ -1662,34 +1700,11 @@ class Router:
## LOGGING
kwargs = self.log_retry(kwargs=kwargs, e=e)
remaining_retries = num_retries - current_attempt
if "No models available" in str(e):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
elif (
hasattr(e, "status_code")
and hasattr(e, "response")
and litellm._should_retry(status_code=e.status_code)
):
if hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
response_headers=e.response.headers,
min_timeout=self.retry_after,
)
else:
timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries,
max_retries=num_retries,
min_timeout=self.retry_after,
)
time.sleep(timeout)
else:
raise e
self._router_should_retry(
e=e,
remaining_retries=remaining_retries,
num_retries=num_retries,
)
raise original_exception
### HELPER FUNCTIONS