diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index a8101181c..53c59ff8a 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -38,7 +38,7 @@ model_list: # max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET router_settings: - routing_strategy: usage-based-routing-v2 + # routing_strategy: usage-based-routing-v2 # redis_url: "os.environ/REDIS_URL" redis_host: os.environ/REDIS_HOST redis_port: os.environ/REDIS_PORT diff --git a/litellm/router.py b/litellm/router.py index 9dcff6f3d..fda53eb4f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2802,6 +2802,7 @@ class Router: """ if ( self.routing_strategy != "usage-based-routing-v2" + and self.routing_strategy != "simple-shuffle" ): # prevent regressions for other routing strategies, that don't have async get available deployments implemented. return self.get_available_deployment( model=model, @@ -2852,6 +2853,25 @@ class Router: messages=messages, input=input, ) + elif self.routing_strategy == "simple-shuffle": + # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm + ############## Check if we can do a RPM/TPM based weighted pick ################# + rpm = healthy_deployments[0].get("litellm_params").get("rpm", None) + if rpm is not None: + # use weight-random pick if rpms provided + rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments] + verbose_router_logger.debug(f"\nrpms {rpms}") + total_rpm = sum(rpms) + weights = [rpm / total_rpm for rpm in rpms] + verbose_router_logger.debug(f"\n weights {weights}") + # Perform weighted random pick + selected_index = random.choices(range(len(rpms)), weights=weights)[0] + verbose_router_logger.debug(f"\n selected index, {selected_index}") + deployment = healthy_deployments[selected_index] + verbose_router_logger.info( + f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}" + ) + return deployment or deployment[0] if deployment is None: verbose_router_logger.info(