mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(router.py): async simple-shuffle support
This commit is contained in:
parent
a909af3fc0
commit
b96741e4f4
2 changed files with 21 additions and 1 deletions
|
@ -2802,6 +2802,7 @@ class Router:
|
|||
"""
|
||||
if (
|
||||
self.routing_strategy != "usage-based-routing-v2"
|
||||
and self.routing_strategy != "simple-shuffle"
|
||||
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
|
||||
return self.get_available_deployment(
|
||||
model=model,
|
||||
|
@ -2852,6 +2853,25 @@ class Router:
|
|||
messages=messages,
|
||||
input=input,
|
||||
)
|
||||
elif self.routing_strategy == "simple-shuffle":
|
||||
# if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm
|
||||
############## Check if we can do a RPM/TPM based weighted pick #################
|
||||
rpm = healthy_deployments[0].get("litellm_params").get("rpm", None)
|
||||
if rpm is not None:
|
||||
# use weight-random pick if rpms provided
|
||||
rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments]
|
||||
verbose_router_logger.debug(f"\nrpms {rpms}")
|
||||
total_rpm = sum(rpms)
|
||||
weights = [rpm / total_rpm for rpm in rpms]
|
||||
verbose_router_logger.debug(f"\n weights {weights}")
|
||||
# Perform weighted random pick
|
||||
selected_index = random.choices(range(len(rpms)), weights=weights)[0]
|
||||
verbose_router_logger.debug(f"\n selected index, {selected_index}")
|
||||
deployment = healthy_deployments[selected_index]
|
||||
verbose_router_logger.info(
|
||||
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}"
|
||||
)
|
||||
return deployment or deployment[0]
|
||||
|
||||
if deployment is None:
|
||||
verbose_router_logger.info(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue