feat add lowest cost router

This commit is contained in:
Ishaan Jaff 2024-05-07 12:12:39 -07:00
parent 31ac43bfdc
commit 1ba4440096

View file

@ -21,6 +21,7 @@ from collections import defaultdict
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
from litellm.llms.custom_httpx.azure_dall_e_2 import (
CustomHTTPTransport,
@ -127,7 +128,7 @@ class Router:
retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0.
allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None.
cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1.
routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}.
Returns:
@ -347,6 +348,14 @@ class Router:
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
elif routing_strategy == "cost-based-routing":
self.lowestcost_logger = LowestCostLoggingHandler(
router_cache=self.cache,
model_list=self.model_list,
routing_args={},
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowestcost_logger) # type: ignore
def print_deployment(self, deployment: dict):
"""
@ -3174,6 +3183,15 @@ class Router:
messages=messages,
input=input,
)
elif (
self.routing_strategy == "cost-based-routing"
and self.lowestcost_logger is not None
):
deployment = self.lowestcost_logger.get_available_deployments(
model_group=model,
healthy_deployments=healthy_deployments,
request_kwargs=request_kwargs,
)
if deployment is None:
verbose_router_logger.info(
f"get_available_deployment for model: {model}, No deployment available"