mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
[Fix] Router cooldown logic - use % thresholds instead of allowed fails to cooldown deployments (#5698)
* move cooldown logic to it's own helper * add new track deployment metrics folder * increment success, fails for deployment in current minute * fix cooldown logic * fix test_aaarouter_dynamic_cooldown_message_retry_time * fix test_single_deployment_no_cooldowns_test_prod_mock_completion_calls * clean up get from deployment test * fix _async_get_healthy_deployments * add mock InternalServerError * test deployment failing 25% requests * add test_high_traffic_cooldowns_one_bad_deployment * fix vertex load test * add test for rate limit error models in cool down * change default cooldown time * fix cooldown message time * fix cooldown on 429 error * fix doc string for _should_cooldown_deployment * fix sync cooldown logic router
This commit is contained in:
parent
fc0dd3e3c2
commit
8f155327f6
11 changed files with 836 additions and 175 deletions
|
@ -83,7 +83,7 @@ class CooldownCache:
|
|||
keys = [f"deployment:{model_id}:cooldown" for model_id in model_ids]
|
||||
|
||||
# Retrieve the values for the keys using mget
|
||||
results = await self.cache.async_batch_get_cache(keys=keys)
|
||||
results = await self.cache.async_batch_get_cache(keys=keys) or []
|
||||
|
||||
active_cooldowns = []
|
||||
# Process the results
|
||||
|
@ -101,7 +101,7 @@ class CooldownCache:
|
|||
keys = [f"deployment:{model_id}:cooldown" for model_id in model_ids]
|
||||
|
||||
# Retrieve the values for the keys using mget
|
||||
results = self.cache.batch_get_cache(keys=keys)
|
||||
results = self.cache.batch_get_cache(keys=keys) or []
|
||||
|
||||
active_cooldowns = []
|
||||
# Process the results
|
||||
|
@ -119,17 +119,19 @@ class CooldownCache:
|
|||
keys = [f"deployment:{model_id}:cooldown" for model_id in model_ids]
|
||||
|
||||
# Retrieve the values for the keys using mget
|
||||
results = self.cache.batch_get_cache(keys=keys)
|
||||
results = self.cache.batch_get_cache(keys=keys) or []
|
||||
|
||||
min_cooldown_time = self.default_cooldown_time
|
||||
min_cooldown_time: Optional[float] = None
|
||||
# Process the results
|
||||
for model_id, result in zip(model_ids, results):
|
||||
if result and isinstance(result, dict):
|
||||
cooldown_cache_value = CooldownCacheValue(**result) # type: ignore
|
||||
if cooldown_cache_value["cooldown_time"] < min_cooldown_time:
|
||||
if min_cooldown_time is None:
|
||||
min_cooldown_time = cooldown_cache_value["cooldown_time"]
|
||||
elif cooldown_cache_value["cooldown_time"] < min_cooldown_time:
|
||||
min_cooldown_time = cooldown_cache_value["cooldown_time"]
|
||||
|
||||
return min_cooldown_time
|
||||
return min_cooldown_time or self.default_cooldown_time
|
||||
|
||||
|
||||
# Usage example:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue