mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
feat add lowest cost router
This commit is contained in:
parent
98778f54e7
commit
e8ce014e3d
1 changed files with 19 additions and 1 deletions
|
@ -21,6 +21,7 @@ from collections import defaultdict
|
||||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||||
from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
|
from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
|
||||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||||
|
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||||
from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
|
from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
|
||||||
from litellm.llms.custom_httpx.azure_dall_e_2 import (
|
from litellm.llms.custom_httpx.azure_dall_e_2 import (
|
||||||
CustomHTTPTransport,
|
CustomHTTPTransport,
|
||||||
|
@ -127,7 +128,7 @@ class Router:
|
||||||
retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0.
|
retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0.
|
||||||
allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None.
|
allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None.
|
||||||
cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1.
|
cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1.
|
||||||
routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
|
routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
|
||||||
routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}.
|
routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -347,6 +348,14 @@ class Router:
|
||||||
)
|
)
|
||||||
if isinstance(litellm.callbacks, list):
|
if isinstance(litellm.callbacks, list):
|
||||||
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
|
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
|
||||||
|
elif routing_strategy == "cost-based-routing":
|
||||||
|
self.lowestcost_logger = LowestCostLoggingHandler(
|
||||||
|
router_cache=self.cache,
|
||||||
|
model_list=self.model_list,
|
||||||
|
routing_args={},
|
||||||
|
)
|
||||||
|
if isinstance(litellm.callbacks, list):
|
||||||
|
litellm.callbacks.append(self.lowestcost_logger) # type: ignore
|
||||||
|
|
||||||
def print_deployment(self, deployment: dict):
|
def print_deployment(self, deployment: dict):
|
||||||
"""
|
"""
|
||||||
|
@ -3174,6 +3183,15 @@ class Router:
|
||||||
messages=messages,
|
messages=messages,
|
||||||
input=input,
|
input=input,
|
||||||
)
|
)
|
||||||
|
elif (
|
||||||
|
self.routing_strategy == "cost-based-routing"
|
||||||
|
and self.lowestcost_logger is not None
|
||||||
|
):
|
||||||
|
deployment = self.lowestcost_logger.get_available_deployments(
|
||||||
|
model_group=model,
|
||||||
|
healthy_deployments=healthy_deployments,
|
||||||
|
request_kwargs=request_kwargs,
|
||||||
|
)
|
||||||
if deployment is None:
|
if deployment is None:
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
f"get_available_deployment for model: {model}, No deployment available"
|
f"get_available_deployment for model: {model}, No deployment available"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue