test(test_router_max_parallel_requests.py): more extensive testing for setting max parallel requests

2024-04-20 12:56:54 -07:00 · 2024-04-20 12:56:54 -07:00 · 0f69f0b44e
commit 0f69f0b44e
parent 7aa737cf10
3 changed files with 69 additions and 4 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2562,7 +2562,7 @@ class Router:
        """
        model_id = deployment["model_info"]["id"]
        if client_type == "max_parallel_requests":
-            cache_key = "{}_max_parallel_requests".format(model_id)
+            cache_key = "{}_max_parallel_requests_client".format(model_id)
            client = self.cache.get_cache(key=cache_key, local_only=True)
            return client
        elif client_type == "async":