fix: fix max parallel requests client

2025-04-25 18:54:30 +00:00 · 2025-03-11 18:25:48 -07:00 · 2025-03-11 18:25:48 -07:00 · e4fc6422e2
commit e4fc6422e2
parent 3ba683be88
2 changed files with 23 additions and 9 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -5346,6 +5346,13 @@ class Router:
            client = self.cache.get_cache(
                key=cache_key, local_only=True, parent_otel_span=parent_otel_span
            )
+            if client is None:
+                InitalizeOpenAISDKClient.set_max_parallel_requests_client(
+                    litellm_router_instance=self, model=deployment
+                )
+                client = self.cache.get_cache(
+                    key=cache_key, local_only=True, parent_otel_span=parent_otel_span
+                )
            return client
        elif client_type == "async":
            if kwargs.get("stream") is True: