forked from phoenix/litellm-mirror
(router_strategy/) ensure all async functions use async cache methods (#6489)
* fix router strat * use async set / get cache in router_strategy * add coverage for router strategy * fix imports * fix batch_get_cache * use async methods for least busy * fix least busy use async methods * fix test_dual_cache_increment * test async_get_available_deployment when routing_strategy="least-busy"
This commit is contained in:
parent
f9ba74ef87
commit
441adad3ae
8 changed files with 202 additions and 20 deletions
|
@ -65,7 +65,9 @@ def test_get_available_deployments():
|
|||
# test_get_available_deployments()
|
||||
|
||||
|
||||
def test_router_get_available_deployments():
|
||||
@pytest.mark.parametrize("async_test", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_get_available_deployments(async_test):
|
||||
"""
|
||||
Tests if 'get_available_deployments' returns the least busy deployment
|
||||
"""
|
||||
|
@ -114,9 +116,14 @@ def test_router_get_available_deployments():
|
|||
deployment = "azure/chatgpt-v-2"
|
||||
request_count_dict = {1: 10, 2: 54, 3: 100}
|
||||
cache_key = f"{model_group}_request_count"
|
||||
router.cache.set_cache(key=cache_key, value=request_count_dict)
|
||||
|
||||
deployment = router.get_available_deployment(model=model_group, messages=None)
|
||||
if async_test is True:
|
||||
await router.cache.async_set_cache(key=cache_key, value=request_count_dict)
|
||||
deployment = await router.async_get_available_deployment(
|
||||
model=model_group, messages=None
|
||||
)
|
||||
else:
|
||||
router.cache.set_cache(key=cache_key, value=request_count_dict)
|
||||
deployment = router.get_available_deployment(model=model_group, messages=None)
|
||||
print(f"deployment: {deployment}")
|
||||
assert deployment["model_info"]["id"] == "1"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue