mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
[Fix] Router cooldown logic - use % thresholds instead of allowed fails to cooldown deployments (#5698)
* move cooldown logic to it's own helper * add new track deployment metrics folder * increment success, fails for deployment in current minute * fix cooldown logic * fix test_aaarouter_dynamic_cooldown_message_retry_time * fix test_single_deployment_no_cooldowns_test_prod_mock_completion_calls * clean up get from deployment test * fix _async_get_healthy_deployments * add mock InternalServerError * test deployment failing 25% requests * add test_high_traffic_cooldowns_one_bad_deployment * fix vertex load test * add test for rate limit error models in cool down * change default cooldown time * fix cooldown message time * fix cooldown on 429 error * fix doc string for _should_cooldown_deployment * fix sync cooldown logic router
This commit is contained in:
parent
7c2ddba6c6
commit
c8d15544c8
11 changed files with 836 additions and 175 deletions
|
@ -0,0 +1,91 @@
|
|||
"""
|
||||
Helper functions to get/set num success and num failures per deployment
|
||||
|
||||
|
||||
set_deployment_failures_for_current_minute
|
||||
set_deployment_successes_for_current_minute
|
||||
|
||||
get_deployment_failures_for_current_minute
|
||||
get_deployment_successes_for_current_minute
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Callable, Optional
|
||||
|
||||
from litellm.utils import get_utc_datetime
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.router import Router as _Router
|
||||
|
||||
LitellmRouter = _Router
|
||||
else:
|
||||
LitellmRouter = Any
|
||||
|
||||
|
||||
def increment_deployment_successes_for_current_minute(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
deployment_id: str,
|
||||
):
|
||||
"""
|
||||
In-Memory: Increments the number of successes for the current minute for a deployment_id
|
||||
"""
|
||||
key = f"{deployment_id}:successes"
|
||||
litellm_router_instance.cache.increment_cache(
|
||||
local_only=True,
|
||||
key=key,
|
||||
value=1,
|
||||
ttl=60,
|
||||
)
|
||||
|
||||
|
||||
def increment_deployment_failures_for_current_minute(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
deployment_id: str,
|
||||
):
|
||||
"""
|
||||
In-Memory: Increments the number of failures for the current minute for a deployment_id
|
||||
"""
|
||||
key = f"{deployment_id}:fails"
|
||||
litellm_router_instance.cache.increment_cache(
|
||||
local_only=True,
|
||||
key=key,
|
||||
value=1,
|
||||
ttl=60,
|
||||
)
|
||||
|
||||
|
||||
def get_deployment_successes_for_current_minute(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
deployment_id: str,
|
||||
) -> int:
|
||||
"""
|
||||
Returns the number of successes for the current minute for a deployment_id
|
||||
|
||||
Returns 0 if no value found
|
||||
"""
|
||||
key = f"{deployment_id}:successes"
|
||||
return (
|
||||
litellm_router_instance.cache.get_cache(
|
||||
local_only=True,
|
||||
key=key,
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
|
||||
def get_deployment_failures_for_current_minute(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
deployment_id: str,
|
||||
) -> int:
|
||||
"""
|
||||
Returns the number of fails for the current minute for a deployment_id
|
||||
|
||||
Returns 0 if no value found
|
||||
"""
|
||||
key = f"{deployment_id}:fails"
|
||||
return (
|
||||
litellm_router_instance.cache.get_cache(
|
||||
local_only=True,
|
||||
key=key,
|
||||
)
|
||||
or 0
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue