fix(client_initialization_utils.py): refactor azure client init logic

2025-04-25 10:44:24 +00:00 · 2025-03-11 09:00:12 -07:00 · 2025-03-11 09:00:12 -07:00 · 8845f0947d
commit 8845f0947d
parent 7696147968
1 changed files with 69 additions and 266 deletions
--- a/litellm/router_utils/client_initalization_utils.py
+++ b/litellm/router_utils/client_initalization_utils.py
@ -194,272 +194,6 @@ class InitalizeOpenAISDKClient:
                organization_env_name = organization.replace("os.environ/", "")
                organization = get_secret_str(organization_env_name)
                litellm_params["organization"] = organization
-            azure_ad_token_provider: Optional[Callable[[], str]] = None
-            # If we have api_key, then we have higher priority
-            if not api_key and litellm_params.get("tenant_id"):
-                verbose_router_logger.debug(
-                    "Using Azure AD Token Provider for Azure Auth"
-                )
-                azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
-                    tenant_id=litellm_params.get("tenant_id"),
-                    client_id=litellm_params.get("client_id"),
-                    client_secret=litellm_params.get("client_secret"),
-                )
-            if litellm_params.get("azure_username") and litellm_params.get(
-                "azure_password"
-            ):
-                azure_ad_token_provider = get_azure_ad_token_from_username_password(
-                    azure_username=litellm_params.get("azure_username"),
-                    azure_password=litellm_params.get("azure_password"),
-                    client_id=litellm_params.get("client_id"),
-                )
-
-            if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
-                if api_base is None or not isinstance(api_base, str):
-                    filtered_litellm_params = {
-                        k: v
-                        for k, v in model["litellm_params"].items()
-                        if k != "api_key"
-                    }
-                    _filtered_model = {
-                        "model_name": model["model_name"],
-                        "litellm_params": filtered_litellm_params,
-                    }
-                    raise ValueError(
-                        f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
-                    )
-                azure_ad_token = litellm_params.get("azure_ad_token")
-                if azure_ad_token is not None:
-                    if azure_ad_token.startswith("oidc/"):
-                        azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                elif (
-                     not api_key and azure_ad_token_provider is None
-                    and litellm.enable_azure_ad_token_refresh is True
-                ):
-                    try:
-                        azure_ad_token_provider = get_azure_ad_token_provider()
-                    except ValueError:
-                        verbose_router_logger.debug(
-                            "Azure AD Token Provider could not be used."
-                        )
-                if api_version is None:
-                    api_version = os.getenv(
-                        "AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
-                    )
-
-                if "gateway.ai.cloudflare.com" in api_base:
-                    if not api_base.endswith("/"):
-                        api_base += "/"
-                    azure_model = model_name.replace("azure/", "")
-                    api_base += f"{azure_model}"
-                    cache_key = f"{model_id}_async_client"
-                    _client = openai.AsyncAzureOpenAI(
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        azure_ad_token_provider=azure_ad_token_provider,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            api_key=api_key,
-                            azure_ad_token=azure_ad_token,
-                            azure_ad_token_provider=azure_ad_token_provider,
-                            base_url=api_base,
-                            api_version=api_version,
-                            timeout=timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),  # type: ignore
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr
-                    # streaming clients can have diff timeouts
-                    cache_key = f"{model_id}_stream_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        api_key=api_key,
-                        azure_ad_token=azure_ad_token,
-                        azure_ad_token_provider=azure_ad_token_provider,
-                        base_url=api_base,
-                        api_version=api_version,
-                        timeout=stream_timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_stream_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            api_key=api_key,
-                            azure_ad_token=azure_ad_token,
-                            azure_ad_token_provider=azure_ad_token_provider,
-                            base_url=api_base,
-                            api_version=api_version,
-                            timeout=stream_timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),  # type: ignore
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr
-                else:
-                    _api_key = api_key
-                    if _api_key is not None and isinstance(_api_key, str):
-                        # only show first 5 chars of api_key
-                        _api_key = _api_key[:8] + "*" * 15
-                    verbose_router_logger.debug(
-                        f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
-                    )
-                    azure_client_params = {
-                        "api_key": api_key,
-                        "azure_endpoint": api_base,
-                        "api_version": api_version,
-                        "azure_ad_token": azure_ad_token,
-                        "azure_ad_token_provider": azure_ad_token_provider,
-                    }
-
-                    if azure_ad_token_provider is not None:
-                        azure_client_params["azure_ad_token_provider"] = (
-                            azure_ad_token_provider
-                        )
-                    from litellm.llms.azure.azure import (
-                        select_azure_base_url_or_endpoint,
-                    )
-
-                    # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
-                    # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
-                    azure_client_params = select_azure_base_url_or_endpoint(
-                        azure_client_params
-                    )
-
-                    cache_key = f"{model_id}_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),  # type: ignore
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            **azure_client_params,
-                            timeout=timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),  # type: ignore
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr
-
-                    # streaming clients should have diff timeouts
-                    cache_key = f"{model_id}_stream_async_client"
-                    _client = openai.AsyncAzureOpenAI(  # type: ignore
-                        **azure_client_params,
-                        timeout=stream_timeout,  # type: ignore
-                        max_retries=max_retries,  # type: ignore
-                        http_client=httpx.AsyncClient(
-                            limits=httpx.Limits(
-                                max_connections=1000, max_keepalive_connections=100
-                            ),
-                            verify=litellm.ssl_verify,
-                        ),
-                    )
-                    litellm_router_instance.cache.set_cache(
-                        key=cache_key,
-                        value=_client,
-                        ttl=client_ttl,
-                        local_only=True,
-                    )  # cache for 1 hr
-
-                    if InitalizeOpenAISDKClient.should_initialize_sync_client(
-                        litellm_router_instance=litellm_router_instance
-                    ):
-                        cache_key = f"{model_id}_stream_client"
-                        _client = openai.AzureOpenAI(  # type: ignore
-                            **azure_client_params,
-                            timeout=stream_timeout,  # type: ignore
-                            max_retries=max_retries,  # type: ignore
-                            http_client=httpx.Client(
-                                limits=httpx.Limits(
-                                    max_connections=1000, max_keepalive_connections=100
-                                ),
-                                verify=litellm.ssl_verify,
-                            ),
-                        )
-                        litellm_router_instance.cache.set_cache(
-                            key=cache_key,
-                            value=_client,
-                            ttl=client_ttl,
-                            local_only=True,
-                        )  # cache for 1 hr

            else:
                _api_key = api_key  # type: ignore
@ -560,3 +294,72 @@ class InitalizeOpenAISDKClient:
                        ttl=client_ttl,
                        local_only=True,
                    )  # cache for 1 hr
+
+
+def initialize_azure_sdk_client(
+    litellm_params: dict,
+    api_key: Optional[str],
+    api_base: Optional[str],
+    model_name: str,
+    api_version: Optional[str],
+):
+    azure_ad_token_provider: Optional[Callable[[], str]] = None
+    # If we have api_key, then we have higher priority
+    azure_ad_token = litellm_params.get("azure_ad_token")
+    tenant_id = litellm_params.get("tenant_id")
+    client_id = litellm_params.get("client_id")
+    client_secret = litellm_params.get("client_secret")
+    azure_username = litellm_params.get("azure_username")
+    azure_password = litellm_params.get("azure_password")
+    if not api_key and tenant_id and client_id and client_secret:
+        verbose_router_logger.debug("Using Azure AD Token Provider for Azure Auth")
+        azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
+            tenant_id=tenant_id,
+            client_id=client_id,
+            client_secret=client_secret,
+        )
+    if azure_username and azure_password and client_id:
+        azure_ad_token_provider = get_azure_ad_token_from_username_password(
+            azure_username=azure_username,
+            azure_password=azure_password,
+            client_id=client_id,
+        )
+
+    if azure_ad_token is not None and azure_ad_token.startswith("oidc/"):
+        azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
+    elif (
+        not api_key
+        and azure_ad_token_provider is None
+        and litellm.enable_azure_ad_token_refresh is True
+    ):
+        try:
+            azure_ad_token_provider = get_azure_ad_token_provider()
+        except ValueError:
+            verbose_router_logger.debug("Azure AD Token Provider could not be used.")
+    if api_version is None:
+        api_version = os.getenv("AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION)
+
+    _api_key = api_key
+    if _api_key is not None and isinstance(_api_key, str):
+        # only show first 5 chars of api_key
+        _api_key = _api_key[:8] + "*" * 15
+    verbose_router_logger.debug(
+        f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
+    )
+    azure_client_params = {
+        "api_key": api_key,
+        "azure_endpoint": api_base,
+        "api_version": api_version,
+        "azure_ad_token": azure_ad_token,
+        "azure_ad_token_provider": azure_ad_token_provider,
+    }
+
+    if azure_ad_token_provider is not None:
+        azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider
+    from litellm.llms.azure.azure import select_azure_base_url_or_endpoint
+
+    # this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
+    # required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
+    azure_client_params = select_azure_base_url_or_endpoint(azure_client_params)
+
+    return azure_client_params