(router_strategy/) ensure all async functions use async cache methods (#6489)

* fix router strat

* use async set / get cache in router_strategy

* add coverage for router strategy

* fix imports

* fix batch_get_cache

* use async methods for least busy

* fix least busy use async methods

* fix test_dual_cache_increment

* test async_get_available_deployment when routing_strategy="least-busy"
This commit is contained in:
Ishaan Jaff 2024-10-29 21:07:17 +05:30 committed by GitHub
parent f9ba74ef87
commit 441adad3ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 202 additions and 20 deletions

View file

@ -5127,6 +5127,7 @@ class Router:
and self.routing_strategy != "simple-shuffle"
and self.routing_strategy != "cost-based-routing"
and self.routing_strategy != "latency-based-routing"
and self.routing_strategy != "least-busy"
): # prevent regressions for other routing strategies, that don't have async get available deployments implemented.
return self.get_available_deployment(
model=model,
@ -5240,6 +5241,16 @@ class Router:
healthy_deployments=healthy_deployments,
model=model,
)
elif (
self.routing_strategy == "least-busy"
and self.leastbusy_logger is not None
):
deployment = (
await self.leastbusy_logger.async_get_available_deployments(
model_group=model,
healthy_deployments=healthy_deployments, # type: ignore
)
)
else:
deployment = None
if deployment is None: