forked from phoenix/litellm-mirror
fix(router.py): async simple-shuffle support
This commit is contained in:
parent
a909af3fc0
commit
b96741e4f4
2 changed files with 21 additions and 1 deletions
|
@ -38,7 +38,7 @@ model_list:
|
||||||
# max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
# max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
routing_strategy: usage-based-routing-v2
|
# routing_strategy: usage-based-routing-v2
|
||||||
# redis_url: "os.environ/REDIS_URL"
|
# redis_url: "os.environ/REDIS_URL"
|
||||||
redis_host: os.environ/REDIS_HOST
|
redis_host: os.environ/REDIS_HOST
|
||||||
redis_port: os.environ/REDIS_PORT
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
|
|
@ -2802,6 +2802,7 @@ class Router:
|
||||||
"""
|
"""
|
||||||
if (
|
if (
|
||||||
self.routing_strategy != "usage-based-routing-v2"
|
self.routing_strategy != "usage-based-routing-v2"
|
||||||
|
and self.routing_strategy != "simple-shuffle"
|
||||||
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
|
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
|
||||||
return self.get_available_deployment(
|
return self.get_available_deployment(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -2852,6 +2853,25 @@ class Router:
|
||||||
messages=messages,
|
messages=messages,
|
||||||
input=input,
|
input=input,
|
||||||
)
|
)
|
||||||
|
elif self.routing_strategy == "simple-shuffle":
|
||||||
|
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
|
||||||
|
############## Check if we can do a RPM/TPM based weighted pick #################
|
||||||
|
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
|
||||||
|
if rpm is not None:
|
||||||
|
# use weight-random pick if rpms provided
|
||||||
|
rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
|
||||||
|
verbose_router_logger.debug(f"\nrpms {rpms}")
|
||||||
|
total_rpm = sum(rpms)
|
||||||
|
weights = [rpm / total_rpm for rpm in rpms]
|
||||||
|
verbose_router_logger.debug(f"\n weights {weights}")
|
||||||
|
# Perform weighted random pick
|
||||||
|
selected_index = random.choices(range(len(rpms)), weights=weights)[0]
|
||||||
|
verbose_router_logger.debug(f"\n selected index, {selected_index}")
|
||||||
|
deployment = healthy_deployments[selected_index]
|
||||||
|
verbose_router_logger.info(
|
||||||
|
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
|
||||||
|
)
|
||||||
|
return deployment or deployment[0]
|
||||||
|
|
||||||
if deployment is None:
|
if deployment is None:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue