mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Improve Proxy Resiliency: Cooldown single-deployment model groups if 100% calls failed in high traffic (#7823)
* refactor(_is_cooldown_required): move '_is_cooldown_required' into cooldown_handlers.py * refactor(cooldown_handlers.py): move cooldown constants into `.constants.py` * fix(cooldown_handlers.py): remove if single deployment don't cooldown logic move to traffic based cooldown logic Addresses https://github.com/BerriAI/litellm/issues/7822 * fix: add unit tests for '_should_cooldown_deployment' * test: ensure all tests pass * test: update test * fix(cooldown_handlers.py): don't cooldown single deployment models for anything besides traffic related errors * fix(cooldown_handlers.py): fix cooldown handler logic * fix(cooldown_handlers.py): fix check
This commit is contained in:
parent
d00febcdaa
commit
80f7af510b
5 changed files with 220 additions and 73 deletions
|
@ -2,11 +2,16 @@ ROUTER_MAX_FALLBACKS = 5
|
|||
DEFAULT_BATCH_SIZE = 512
|
||||
DEFAULT_FLUSH_INTERVAL_SECONDS = 5
|
||||
DEFAULT_MAX_RETRIES = 2
|
||||
DEFAULT_FAILURE_THRESHOLD_PERCENT = (
|
||||
0.5 # default cooldown a deployment if 50% of requests fail in a given minute
|
||||
)
|
||||
DEFAULT_COOLDOWN_TIME_SECONDS = 5
|
||||
DEFAULT_REPLICATE_POLLING_RETRIES = 5
|
||||
DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
|
||||
DEFAULT_IMAGE_TOKEN_COUNT = 250
|
||||
DEFAULT_IMAGE_WIDTH = 300
|
||||
DEFAULT_IMAGE_HEIGHT = 300
|
||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||
LITELLM_CHAT_PROVIDERS = [
|
||||
"openai",
|
||||
"openai_like",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue