fix(lowest_latency.py): add back tpm/rpm checks, configurable time window

This commit is contained in:
Krrish Dholakia 2024-01-10 20:52:01 +05:30
parent 5e811cfe80
commit bb04a340a5
3 changed files with 312 additions and 51 deletions

View file

@ -105,6 +105,7 @@ class Router:
"usage-based-routing",
"latency-based-routing",
] = "simple-shuffle",
routing_strategy_args: dict = {}, # just for latency-based routing
) -> None:
self.set_verbose = set_verbose
self.deployment_names: List = (
@ -217,7 +218,9 @@ class Router:
litellm.callbacks.append(self.lowesttpm_logger) # type: ignore
elif routing_strategy == "latency-based-routing":
self.lowestlatency_logger = LowestLatencyLoggingHandler(
router_cache=self.cache, model_list=self.model_list
router_cache=self.cache,
model_list=self.model_list,
routing_args=routing_strategy_args,
)
if isinstance(litellm.callbacks, list):
litellm.callbacks.append(self.lowestlatency_logger) # type: ignore
@ -1427,9 +1430,8 @@ class Router:
http_client=httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(),
limits=httpx.Limits(
max_connections=1000,
max_keepalive_connections=100
)
max_connections=1000, max_keepalive_connections=100
),
), # type: ignore
)
self.cache.set_cache(
@ -1449,9 +1451,8 @@ class Router:
http_client=httpx.Client(
transport=CustomHTTPTransport(),
limits=httpx.Limits(
max_connections=1000,
max_keepalive_connections=100
)
max_connections=1000, max_keepalive_connections=100
),
), # type: ignore
)
self.cache.set_cache(
@ -1471,10 +1472,9 @@ class Router:
max_retries=max_retries,
http_client=httpx.AsyncClient(
limits=httpx.Limits(
max_connections=1000,
max_keepalive_connections=100
max_connections=1000, max_keepalive_connections=100
)
)
),
)
self.cache.set_cache(
key=cache_key,
@ -1492,10 +1492,9 @@ class Router:
max_retries=max_retries,
http_client=httpx.Client(
limits=httpx.Limits(
max_connections=1000,
max_keepalive_connections=100
max_connections=1000, max_keepalive_connections=100
)
)
),
)
self.cache.set_cache(
key=cache_key,