fix(proxy_server.py): expose flag to disable retries when max parallel request limit is hit

This commit is contained in:
Krrish Dholakia 2024-08-22 16:49:52 -07:00
parent d7b525f391
commit 735fc804ed
2 changed files with 8 additions and 0 deletions

View file

@ -727,6 +727,7 @@ general_settings:
"completion_model": "string", "completion_model": "string",
"disable_spend_logs": "boolean", # turn off writing each transaction to the db "disable_spend_logs": "boolean", # turn off writing each transaction to the db
"disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint) "disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint)
"disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached
"disable_reset_budget": "boolean", # turn off reset budget scheduled task "disable_reset_budget": "boolean", # turn off reset budget scheduled task
"disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking "disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking
"enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims "enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims

View file

@ -2672,6 +2672,13 @@ def giveup(e):
and isinstance(e.message, str) and isinstance(e.message, str)
and "Max parallel request limit reached" in e.message and "Max parallel request limit reached" in e.message
) )
if (
general_settings.get("disable_retry_on_max_parallel_request_limit_error")
is True
):
return True # giveup if queuing max parallel request limits is disabled
if result: if result:
verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)})) verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
return result return result