test(test_router_max_parallel_requests.py): more extensive testing for setting max parallel requests

This commit is contained in:
Krrish Dholakia 2024-04-20 12:56:54 -07:00
parent a9108cbdc2
commit 9f6e90e17d
3 changed files with 69 additions and 4 deletions

View file

@ -2562,7 +2562,7 @@ class Router:
"""
model_id = deployment["model_info"]["id"]
if client_type == "max_parallel_requests":
cache_key = "{}_max_parallel_requests".format(model_id)
cache_key = "{}_max_parallel_requests_client".format(model_id)
client = self.cache.get_cache(key=cache_key, local_only=True)
return client
elif client_type == "async":