mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
fix(router.py): back-off if no models available
This commit is contained in:
parent
3ffccf8804
commit
a4308fadce
2 changed files with 13 additions and 8 deletions
|
@ -411,7 +411,10 @@ class Router:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
### RETRY
|
### RETRY
|
||||||
#### check if it should retry + back-off if required
|
#### check if it should retry + back-off if required
|
||||||
if hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
|
if "No models available" in str(e):
|
||||||
|
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries)
|
||||||
|
await asyncio.sleep(timeout)
|
||||||
|
elif hasattr(original_exception, "status_code") and hasattr(original_exception, "response") and litellm._should_retry(status_code=original_exception.status_code):
|
||||||
if hasattr(original_exception.response, "headers"):
|
if hasattr(original_exception.response, "headers"):
|
||||||
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=original_exception.response.headers)
|
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=original_exception.response.headers)
|
||||||
else:
|
else:
|
||||||
|
@ -430,13 +433,15 @@ class Router:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
|
remaining_retries = num_retries - current_attempt
|
||||||
remaining_retries = num_retries - current_attempt
|
if "No models available" in str(e):
|
||||||
|
timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries, min_timeout=1)
|
||||||
|
await asyncio.sleep(timeout)
|
||||||
|
elif hasattr(e, "status_code") and hasattr(e, "response") and litellm._should_retry(status_code=e.status_code):
|
||||||
if hasattr(e.response, "headers"):
|
if hasattr(e.response, "headers"):
|
||||||
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries, response_headers=e.response.headers)
|
timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries, response_headers=e.response.headers)
|
||||||
else:
|
else:
|
||||||
timeout = litellm._calculate_retry_after(remaining_retries=num_retries, max_retries=num_retries)
|
timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries)
|
||||||
timeout = litellm._calculate_retry_after(remaining_retries=remaining_retries, max_retries=num_retries)
|
|
||||||
await asyncio.sleep(timeout)
|
await asyncio.sleep(timeout)
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
|
@ -3507,7 +3507,7 @@ def _should_retry(status_code: int):
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _calculate_retry_after(remaining_retries: int, max_retries: int, response_headers: Optional[httpx.Headers]=None):
|
def _calculate_retry_after(remaining_retries: int, max_retries: int, response_headers: Optional[httpx.Headers]=None, min_timeout: int = 0):
|
||||||
"""
|
"""
|
||||||
Reimplementation of openai's calculate retry after, since that one can't be imported.
|
Reimplementation of openai's calculate retry after, since that one can't be imported.
|
||||||
https://github.com/openai/openai-python/blob/af67cfab4210d8e497c05390ce14f39105c77519/src/openai/_base_client.py#L631
|
https://github.com/openai/openai-python/blob/af67cfab4210d8e497c05390ce14f39105c77519/src/openai/_base_client.py#L631
|
||||||
|
@ -3549,7 +3549,7 @@ def _calculate_retry_after(remaining_retries: int, max_retries: int, response_he
|
||||||
# Apply some jitter, plus-or-minus half a second.
|
# Apply some jitter, plus-or-minus half a second.
|
||||||
jitter = 1 - 0.25 * random.random()
|
jitter = 1 - 0.25 * random.random()
|
||||||
timeout = sleep_seconds * jitter
|
timeout = sleep_seconds * jitter
|
||||||
return timeout if timeout >= 0 else 0
|
return timeout if timeout >= min_timeout else min_timeout
|
||||||
|
|
||||||
# integration helper function
|
# integration helper function
|
||||||
def modify_integration(integration_name, integration_params):
|
def modify_integration(integration_name, integration_params):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue