diff --git a/litellm/router.py b/litellm/router.py index 24f356e085..04d6ead90f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -411,7 +411,10 @@ class Router: raise original_exception ### RETRY #### check if it should retry + back-off if required - if hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code): + if "No models available" in str(e): + timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries) + await asyncio.sleep(timeout) + elif hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code): if hasattr(original_exception.response, "headers"): timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=original_exception.response.headers) else: @@ -430,13 +433,15 @@ class Router: return response except Exception as e: - if hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code): - remaining_retries = num_retries - current_attempt + remaining_retries = num_retries - current_attempt + if "No models available" in str(e): + timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries, min_timeout=1) + await asyncio.sleep(timeout) + elif hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code): if hasattr(e.response, "headers"): - timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=e.response.headers) + timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries, response_headers=e.response.headers) else: - timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries) - timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries) + timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries) await asyncio.sleep(timeout) else: raise e diff --git a/litellm/utils.py b/litellm/utils.py index c4d9e7670e..a3908e71c3 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3507,7 +3507,7 @@ def _should_retry(status_code: int): return False -def _calculate_retry_after(remaining_retries: int, max_retries: int, response_headers: Optional[httpx.Headers]=None): +def _calculate_retry_after(remaining_retries: int, max_retries: int, response_headers: Optional[httpx.Headers]=None, min_timeout: int = 0): """ Reimplementation of openai's calculate retry after, since that one can't be imported. https://github.com/openai/openai-python/blob/af67cfab4210d8e497c05390ce14f39105c77519/src/openai/_base_client.py#L631 @@ -3549,7 +3549,7 @@ def _calculate_retry_after(remaining_retries: int, max_retries: int, response_he # Apply some jitter, plus-or-minus half a second. jitter = 1 - 0.25 * random.random() timeout = sleep_seconds * jitter - return timeout if timeout >= 0 else 0 + return timeout if timeout >= min_timeout else min_timeout # integration helper function def modify_integration(integration_name, integration_params):