diff --git a/litellm/router.py b/litellm/router.py index 4353da804b..25ebf818ef 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -21,6 +21,7 @@ from collections import defaultdict from litellm.router_strategy.least_busy import LeastBusyLoggingHandler from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler +from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2 from litellm.llms.custom_httpx.azure_dall_e_2 import ( CustomHTTPTransport, @@ -127,7 +128,7 @@ class Router: retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0. allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None. cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1. - routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing"]): Routing strategy. Defaults to "simple-shuffle". + routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle". routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}. Returns: @@ -347,6 +348,14 @@ class Router: ) if isinstance(litellm.callbacks, list): litellm.callbacks.append(self.lowestlatency_logger) # type: ignore + elif routing_strategy == "cost-based-routing": + self.lowestcost_logger = LowestCostLoggingHandler( + router_cache=self.cache, + model_list=self.model_list, + routing_args={}, + ) + if isinstance(litellm.callbacks, list): + litellm.callbacks.append(self.lowestcost_logger) # type: ignore def print_deployment(self, deployment: dict): """ @@ -3174,6 +3183,15 @@ class Router: messages=messages, input=input, ) + elif ( + self.routing_strategy == "cost-based-routing" + and self.lowestcost_logger is not None + ): + deployment = self.lowestcost_logger.get_available_deployments( + model_group=model, + healthy_deployments=healthy_deployments, + request_kwargs=request_kwargs, + ) if deployment is None: verbose_router_logger.info( f"get_available_deployment for model: {model}, No deployment available"