fix: fix max parallel requests client

This commit is contained in:
Krrish Dholakia 2025-03-11 18:25:48 -07:00
parent c1ec1a3ed6
commit 507504906e
2 changed files with 23 additions and 9 deletions

View file

@ -5346,6 +5346,13 @@ class Router:
client = self.cache.get_cache(
key=cache_key, local_only=True, parent_otel_span=parent_otel_span
)
if client is None:
InitalizeOpenAISDKClient.set_max_parallel_requests_client(
litellm_router_instance=self, model=deployment
)
client = self.cache.get_cache(
key=cache_key, local_only=True, parent_otel_span=parent_otel_span
)
return client
elif client_type == "async":
if kwargs.get("stream") is True: