Merge pull request #5358 from BerriAI/litellm_fix_retry_after

fix retry after - cooldown individual models based on their specific 'retry-after' header
2025-04-26 03:04:13 +00:00 · 2024-08-27 11:50:14 -07:00 · 2024-08-27 11:50:14 -07:00 · 415abc86c6
commit 415abc86c6
parent 0fca960221 18b67a455e
12 changed files with 754 additions and 202 deletions
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -549,3 +549,19 @@ class RouterGeneralSettings(BaseModel):
    pass_through_all_models: bool = Field(
        default=False
    )  # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
+
+
+class RouterRateLimitError(ValueError):
+    def __init__(
+        self,
+        model: str,
+        cooldown_time: float,
+        enable_pre_call_checks: bool,
+        cooldown_list: List,
+    ):
+        self.model = model
+        self.cooldown_time = cooldown_time
+        self.enable_pre_call_checks = enable_pre_call_checks
+        self.cooldown_list = cooldown_list
+        _message = f"{RouterErrors.no_deployments_available.value}, Try again in {cooldown_time} seconds. Passed model={model}. pre-call-checks={enable_pre_call_checks}, cooldown_list={cooldown_list}"
+        super().__init__(_message)