forked from phoenix/litellm-mirror
feat add lowest cost router
This commit is contained in:
parent
31ac43bfdc
commit
1ba4440096
1 changed files with 19 additions and 1 deletions
|
@ -21,6 +21,7 @@ from collections import defaultdict
|
|||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
|
||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
|
||||
from litellm.llms.custom_httpx.azure_dall_e_2 import (
|
||||
CustomHTTPTransport,
|
||||
|
@ -127,7 +128,7 @@ class Router:
|
|||
retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0.
|
||||
allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None.
|
||||
cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1.
|
||||
routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
|
||||
routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
|
||||
routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}.
|
||||
|
||||
Returns:
|
||||
|
@ -347,6 +348,14 @@ class Router:
|
|||
)
|
||||
if isinstance(litellm.callbacks, list):
|
||||
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
|
||||
elif routing_strategy == "cost-based-routing":
|
||||
self.lowestcost_logger = LowestCostLoggingHandler(
|
||||
router_cache=self.cache,
|
||||
model_list=self.model_list,
|
||||
routing_args={},
|
||||
)
|
||||
if isinstance(litellm.callbacks, list):
|
||||
litellm.callbacks.append(self.lowestcost_logger) # type: ignore
|
||||
|
||||
def print_deployment(self, deployment: dict):
|
||||
"""
|
||||
|
@ -3174,6 +3183,15 @@ class Router:
|
|||
messages=messages,
|
||||
input=input,
|
||||
)
|
||||
elif (
|
||||
self.routing_strategy == "cost-based-routing"
|
||||
and self.lowestcost_logger is not None
|
||||
):
|
||||
deployment = self.lowestcost_logger.get_available_deployments(
|
||||
model_group=model,
|
||||
healthy_deployments=healthy_deployments,
|
||||
request_kwargs=request_kwargs,
|
||||
)
|
||||
if deployment is None:
|
||||
verbose_router_logger.info(
|
||||
f"get_available_deployment for model: {model}, No deployment available"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue