fix(proxy_server.py): expose flag to disable retries when max parallel request limit is hit

2025-04-26 03:04:13 +00:00 · 2024-08-22 16:49:52 -07:00 · 2024-08-22 16:49:52 -07:00 · b0706a6f8f
commit b0706a6f8f
parent 73a5921262
2 changed files with 8 additions and 0 deletions
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -727,6 +727,7 @@ general_settings:
    "completion_model": "string",
    "disable_spend_logs": "boolean", # turn off writing each transaction to the db
    "disable_master_key_return": "boolean", # turn off returning master key on UI (checked on '/user/info' endpoint)
+    "disable_retry_on_max_parallel_request_limit_error": "boolean", # turn off retries when max parallel request limit is reached
    "disable_reset_budget": "boolean", # turn off reset budget scheduled task
    "disable_adding_master_key_hash_to_db": "boolean", # turn off storing master key hash in db, for spend tracking
    "enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2672,6 +2672,13 @@ def giveup(e):
        and isinstance(e.message, str)
        and "Max parallel request limit reached" in e.message
    )
+
+    if (
+        general_settings.get("disable_retry_on_max_parallel_request_limit_error")
+        is True
+    ):
+        return True  # giveup if queuing max parallel request limits is disabled
+
    if result:
        verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
    return result