(router_strategy/) ensure all async functions use async cache methods (#6489)

* fix router strat

* use async set / get cache in router_strategy

* add coverage for router strategy

* fix imports

* fix batch_get_cache

* use async methods for least busy

* fix least busy use async methods

* fix test_dual_cache_increment

* test async_get_available_deployment when routing_strategy="least-busy"
This commit is contained in:
Ishaan Jaff 2024-10-29 21:07:17 +05:30 committed by GitHub
parent f9ba74ef87
commit 441adad3ae
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 202 additions and 20 deletions

View file

@ -65,7 +65,9 @@ def test_get_available_deployments():
# test_get_available_deployments()
def test_router_get_available_deployments():
@pytest.mark.parametrize("async_test", [True, False])
@pytest.mark.asyncio
async def test_router_get_available_deployments(async_test):
"""
Tests if 'get_available_deployments' returns the least busy deployment
"""
@ -114,9 +116,14 @@ def test_router_get_available_deployments():
deployment = "azure/chatgpt-v-2"
request_count_dict = {1: 10, 2: 54, 3: 100}
cache_key = f"{model_group}_request_count"
router.cache.set_cache(key=cache_key, value=request_count_dict)
deployment = router.get_available_deployment(model=model_group, messages=None)
if async_test is True:
await router.cache.async_set_cache(key=cache_key, value=request_count_dict)
deployment = await router.async_get_available_deployment(
model=model_group, messages=None
)
else:
router.cache.set_cache(key=cache_key, value=request_count_dict)
deployment = router.get_available_deployment(model=model_group, messages=None)
print(f"deployment: {deployment}")
assert deployment["model_info"]["id"] == "1"