feat add lowest cost router

2025-04-26 11:14:04 +00:00 · 2024-05-07 12:12:39 -07:00 · 2024-05-07 12:12:39 -07:00 · e8ce014e3d
commit e8ce014e3d
parent 98778f54e7
1 changed files with 19 additions and 1 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -21,6 +21,7 @@ from collections import defaultdict
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
 from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
 from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
 from litellm.llms.custom_httpx.azure_dall_e_2 import (
    CustomHTTPTransport,
@ -127,7 +128,7 @@ class Router:
            retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0.
            allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None.
            cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1.
-            routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
+            routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
            routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}.
        Returns:
@ -347,6 +348,14 @@ class Router:
            )
            if isinstance(litellm.callbacks, list):
                litellm.callbacks.append(self.lowestlatency_logger)  # type: ignore
        elif routing_strategy == "cost-based-routing":
            self.lowestcost_logger = LowestCostLoggingHandler(
                router_cache=self.cache,
                model_list=self.model_list,
                routing_args={},
            )
            if isinstance(litellm.callbacks, list):
                litellm.callbacks.append(self.lowestcost_logger)  # type: ignore
    def print_deployment(self, deployment: dict):
        """
@ -3174,6 +3183,15 @@ class Router:
                messages=messages,
                input=input,
            )
        elif (
            self.routing_strategy == "cost-based-routing"
            and self.lowestcost_logger is not None
        ):
            deployment = self.lowestcost_logger.get_available_deployments(
                model_group=model,
                healthy_deployments=healthy_deployments,
                request_kwargs=request_kwargs,
            )
        if deployment is None:
            verbose_router_logger.info(
                f"get_available_deployment for model: {model}, No deployment available"