mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Merge pull request #5358 from BerriAI/litellm_fix_retry_after
fix retry after - cooldown individual models based on their specific 'retry-after' header
This commit is contained in:
commit
415abc86c6
12 changed files with 754 additions and 202 deletions
|
@ -549,3 +549,19 @@ class RouterGeneralSettings(BaseModel):
|
|||
pass_through_all_models: bool = Field(
|
||||
default=False
|
||||
) # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding
|
||||
|
||||
|
||||
class RouterRateLimitError(ValueError):
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
cooldown_time: float,
|
||||
enable_pre_call_checks: bool,
|
||||
cooldown_list: List,
|
||||
):
|
||||
self.model = model
|
||||
self.cooldown_time = cooldown_time
|
||||
self.enable_pre_call_checks = enable_pre_call_checks
|
||||
self.cooldown_list = cooldown_list
|
||||
_message = f"{RouterErrors.no_deployments_available.value}, Try again in {cooldown_time} seconds. Passed model={model}. pre-call-checks={enable_pre_call_checks}, cooldown_list={cooldown_list}"
|
||||
super().__init__(_message)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue