fix: fix max parallel requests client

2025-04-25 18:54:30 +00:00 · 2025-03-11 18:25:48 -07:00 · 2025-03-11 18:25:48 -07:00 · 507504906e
commit 507504906e
parent c1ec1a3ed6
2 changed files with 23 additions and 9 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -5346,6 +5346,13 @@ class Router:
            client = self.cache.get_cache(
                key=cache_key, local_only=True, parent_otel_span=parent_otel_span
            )
+            if client is None:
+                InitalizeOpenAISDKClient.set_max_parallel_requests_client(
+                    litellm_router_instance=self, model=deployment
+                )
+                client = self.cache.get_cache(
+                    key=cache_key, local_only=True, parent_otel_span=parent_otel_span
+                )
            return client
        elif client_type == "async":
            if kwargs.get("stream") is True: