fix: fix max parallel requests client

2025-04-25 18:54:30 +00:00 · 2025-03-11 18:25:48 -07:00 · 2025-03-11 18:25:48 -07:00 · e4fc6422e2
commit e4fc6422e2
parent 3ba683be88
2 changed files with 23 additions and 9 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -5346,6 +5346,13 @@ class Router:
            client = self.cache.get_cache(
                key=cache_key, local_only=True, parent_otel_span=parent_otel_span
            )
            if client is None:
                InitalizeOpenAISDKClient.set_max_parallel_requests_client(
                    litellm_router_instance=self, model=deployment
                )
                client = self.cache.get_cache(
                    key=cache_key, local_only=True, parent_otel_span=parent_otel_span
                )
            return client
        elif client_type == "async":
            if kwargs.get("stream") is True:
--- a/litellm/router_utils/client_initalization_utils.py
+++ b/litellm/router_utils/client_initalization_utils.py
@ -54,18 +54,11 @@ class InitalizeOpenAISDKClient:
        return True
    @staticmethod
-    def set_client(  # noqa: PLR0915
+    def set_max_parallel_requests_client(
        litellm_router_instance: LitellmRouter, model: dict
    ):
        """
        - Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
        - Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
        """
        client_ttl = litellm_router_instance.client_ttl
        litellm_params = model.get("litellm_params", {})
        model_name = litellm_params.get("model")
        model_id = model["model_info"]["id"]
        # ### IF RPM SET - initialize a semaphore ###
        rpm = litellm_params.get("rpm", None)
        tpm = litellm_params.get("tpm", None)
        max_parallel_requests = litellm_params.get("max_parallel_requests", None)
@ -84,6 +77,19 @@ class InitalizeOpenAISDKClient:
                local_only=True,
            )
    @staticmethod
    def set_client(  # noqa: PLR0915
        litellm_router_instance: LitellmRouter, model: dict
    ):
        """
        - Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
        - Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
        """
        client_ttl = litellm_router_instance.client_ttl
        litellm_params = model.get("litellm_params", {})
        model_name = litellm_params.get("model")
        model_id = model["model_info"]["id"]
        ####  for OpenAI / Azure we need to initalize the Client for High Traffic ########
        custom_llm_provider = litellm_params.get("custom_llm_provider")
        custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
@ -233,7 +239,8 @@ class InitalizeOpenAISDKClient:
                    if azure_ad_token.startswith("oidc/"):
                        azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
                elif (
-                     not api_key and azure_ad_token_provider is None
+                    not api_key
                    and azure_ad_token_provider is None
                    and litellm.enable_azure_ad_token_refresh is True
                ):
                    try: