forked from phoenix/litellm-mirror
feat(proxy_server.py): retry if virtual key is rate limited
currently for chat completions
This commit is contained in:
parent
f95458dad8
commit
ad55f4dbb5
4 changed files with 57 additions and 1 deletions
|
@ -8,6 +8,7 @@ import hashlib, uuid
|
|||
import warnings
|
||||
import importlib
|
||||
import warnings
|
||||
import backoff
|
||||
|
||||
|
||||
def showwarning(message, category, filename, lineno, file=None, line=None):
|
||||
|
@ -2298,6 +2299,11 @@ def parse_cache_control(cache_control):
|
|||
return cache_dict
|
||||
|
||||
|
||||
def on_backoff(details):
|
||||
# The 'tries' key in the details dictionary contains the number of completed tries
|
||||
verbose_proxy_logger.debug(f"Backing off... this was attempt #{details['tries']}")
|
||||
|
||||
|
||||
@router.on_event("startup")
|
||||
async def startup_event():
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name
|
||||
|
@ -2613,6 +2619,19 @@ async def completion(
|
|||
dependencies=[Depends(user_api_key_auth)],
|
||||
tags=["chat/completions"],
|
||||
) # azure compatible endpoint
|
||||
@backoff.on_exception(
|
||||
backoff.expo,
|
||||
Exception, # base exception to catch for the backoff
|
||||
max_tries=litellm.num_retries or 3, # maximum number of retries
|
||||
max_time=litellm.request_timeout or 60, # maximum total time to retry for
|
||||
on_backoff=on_backoff, # specifying the function to call on backoff
|
||||
giveup=lambda e: not (
|
||||
isinstance(e, ProxyException)
|
||||
and getattr(e, "message", None) is not None
|
||||
and isinstance(e.message, str)
|
||||
and "Max parallel request limit reached" in e.message
|
||||
), # the result of the logical expression is on the second position
|
||||
)
|
||||
async def chat_completion(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue