Merge pull request #5358 from BerriAI/litellm_fix_retry_after

fix retry after - cooldown individual models based on their specific 'retry-after' header
This commit is contained in:
Krish Dholakia 2024-08-27 11:50:14 -07:00 committed by GitHub
commit 415abc86c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 754 additions and 202 deletions

View file

@ -549,3 +549,19 @@ class RouterGeneralSettings(BaseModel):
pass_through_all_models: bool = Field(
default=False
) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
class RouterRateLimitError(ValueError):
def __init__(
self,
model: str,
cooldown_time: float,
enable_pre_call_checks: bool,
cooldown_list: List,
):
self.model = model
self.cooldown_time = cooldown_time
self.enable_pre_call_checks = enable_pre_call_checks
self.cooldown_list = cooldown_list
_message = f"{RouterErrors.no_deployments_available.value}, Try again in {cooldown_time} seconds. Passed model={model}. pre-call-checks={enable_pre_call_checks}, cooldown_list={cooldown_list}"
super().__init__(_message)