refactor(azure.py): refactor acompletion to use base azure sdk client

2025-04-26 19:24:27 +00:00 · 2025-03-11 13:59:13 -07:00 · 2025-03-11 13:59:13 -07:00 · 858d9005a2
commit 858d9005a2
parent d68218a0fe
3 changed files with 162 additions and 90 deletions
--- a/litellm/llms/azure/azure.py
+++ b/litellm/llms/azure/azure.py
@ -31,6 +31,7 @@ from ...types.llms.openai import HttpxBinaryResponseContent
 from ..base import BaseLLM
 from .common_utils import (
    AzureOpenAIError,
+    BaseAzureLLM,
    get_azure_ad_token_from_oidc,
    process_azure_headers,
    select_azure_base_url_or_endpoint,
@ -120,7 +121,7 @@ def _check_dynamic_azure_params(
    return False


-class AzureChatCompletion(BaseLLM):
+class AzureChatCompletion(BaseAzureLLM, BaseLLM):
    def __init__(self) -> None:
        super().__init__()

@ -348,6 +349,7 @@ class AzureChatCompletion(BaseLLM):
                        logging_obj=logging_obj,
                        max_retries=max_retries,
                        convert_tool_call_to_json_mode=json_mode,
+                        litellm_params=litellm_params,
                    )
            elif "stream" in optional_params and optional_params["stream"] is True:
                return self.streaming(
@ -476,29 +478,18 @@ class AzureChatCompletion(BaseLLM):
        azure_ad_token_provider: Optional[Callable] = None,
        convert_tool_call_to_json_mode: Optional[bool] = None,
        client=None,  # this is the AsyncAzureOpenAI
+        litellm_params: Optional[dict] = None,
    ):
        response = None
        try:
            # init AzureOpenAI Client
-            azure_client_params = {
-                "api_version": api_version,
-                "azure_endpoint": api_base,
-                "azure_deployment": model,
-                "http_client": litellm.aclient_session,
-                "max_retries": max_retries,
-                "timeout": timeout,
-            }
-            azure_client_params = select_azure_base_url_or_endpoint(
-                azure_client_params=azure_client_params
+            azure_client_params = self.initialize_azure_sdk_client(
+                litellm_params=litellm_params or {},
+                api_key=api_key,
+                api_base=api_base,
+                model_name=model,
+                api_version=api_version,
            )
-            if api_key is not None:
-                azure_client_params["api_key"] = api_key
-            elif azure_ad_token is not None:
-                if azure_ad_token.startswith("oidc/"):
-                    azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
-                azure_client_params["azure_ad_token"] = azure_ad_token
-            elif azure_ad_token_provider is not None:
-                azure_client_params["azure_ad_token_provider"] = azure_ad_token_provider

            # setting Azure client
            if client is None or dynamic_params:
--- a/litellm/llms/azure/common_utils.py
+++ b/litellm/llms/azure/common_utils.py
@ -275,13 +275,16 @@ def select_azure_base_url_or_endpoint(azure_client_params: dict):
    return azure_client_params


+class BaseAzureLLM:
    def initialize_azure_sdk_client(
+        self,
        litellm_params: dict,
        api_key: Optional[str],
        api_base: Optional[str],
        model_name: str,
        api_version: Optional[str],
    ) -> dict:
+
        azure_ad_token_provider: Optional[Callable[[], str]] = None
        # If we have api_key, then we have higher priority
        azure_ad_token = litellm_params.get("azure_ad_token")
@ -316,7 +319,9 @@ def initialize_azure_sdk_client(
            except ValueError:
                verbose_logger.debug("Azure AD Token Provider could not be used.")
        if api_version is None:
-        api_version = os.getenv("AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION)
+            api_version = os.getenv(
+                "AZURE_API_VERSION", litellm.AZURE_DEFAULT_API_VERSION
+            )

        _api_key = api_key
        if _api_key is not None and isinstance(_api_key, str):
--- a/tests/litellm/llms/azure/test_azure_common_utils.py
+++ b/tests/litellm/llms/azure/test_azure_common_utils.py
@ -10,7 +10,8 @@ sys.path.insert(
    0, os.path.abspath("../../../..")
 )  # Adds the parent directory to the system path
 import litellm
-from litellm.llms.azure.common_utils import initialize_azure_sdk_client
+from litellm.llms.azure.common_utils import BaseAzureLLM
+from litellm.types.utils import CallTypes


 # Mock the necessary dependencies
@ -58,7 +59,7 @@ def setup_mocks():

 def test_initialize_with_api_key(setup_mocks):
    # Test with api_key provided
-    result = initialize_azure_sdk_client(
+    result = BaseAzureLLM().initialize_azure_sdk_client(
        litellm_params={},
        api_key="test-api-key",
        api_base="https://test.openai.azure.com",
@ -76,7 +77,7 @@ def test_initialize_with_api_key(setup_mocks):

 def test_initialize_with_tenant_credentials(setup_mocks):
    # Test with tenant_id, client_id, and client_secret provided
-    result = initialize_azure_sdk_client(
+    result = BaseAzureLLM().initialize_azure_sdk_client(
        litellm_params={
            "tenant_id": "test-tenant-id",
            "client_id": "test-client-id",
@ -103,7 +104,7 @@ def test_initialize_with_tenant_credentials(setup_mocks):

 def test_initialize_with_username_password(setup_mocks):
    # Test with azure_username, azure_password, and client_id provided
-    result = initialize_azure_sdk_client(
+    result = BaseAzureLLM().initialize_azure_sdk_client(
        litellm_params={
            "azure_username": "test-username",
            "azure_password": "test-password",
@ -128,7 +129,7 @@ def test_initialize_with_username_password(setup_mocks):

 def test_initialize_with_oidc_token(setup_mocks):
    # Test with azure_ad_token that starts with "oidc/"
-    result = initialize_azure_sdk_client(
+    result = BaseAzureLLM().initialize_azure_sdk_client(
        litellm_params={"azure_ad_token": "oidc/test-token"},
        api_key=None,
        api_base="https://test.openai.azure.com",
@ -148,7 +149,7 @@ def test_initialize_with_enable_token_refresh(setup_mocks):
    setup_mocks["litellm"].enable_azure_ad_token_refresh = True

    # Test with token refresh enabled
-    result = initialize_azure_sdk_client(
+    result = BaseAzureLLM().initialize_azure_sdk_client(
        litellm_params={},
        api_key=None,
        api_base="https://test.openai.azure.com",
@ -169,7 +170,7 @@ def test_initialize_with_token_refresh_error(setup_mocks):
    setup_mocks["token_provider"].side_effect = ValueError("Token provider error")

    # Test with token refresh enabled but raising error
-    result = initialize_azure_sdk_client(
+    result = BaseAzureLLM().initialize_azure_sdk_client(
        litellm_params={},
        api_key=None,
        api_base="https://test.openai.azure.com",
@ -186,7 +187,7 @@ def test_initialize_with_token_refresh_error(setup_mocks):
 def test_api_version_from_env_var(setup_mocks):
    # Test api_version from environment variable
    with patch.dict(os.environ, {"AZURE_API_VERSION": "2023-07-01"}):
-        result = initialize_azure_sdk_client(
+        result = BaseAzureLLM().initialize_azure_sdk_client(
            litellm_params={},
            api_key="test-api-key",
            api_base="https://test.openai.azure.com",
@ -200,7 +201,7 @@ def test_api_version_from_env_var(setup_mocks):

 def test_select_azure_base_url_called(setup_mocks):
    # Test that select_azure_base_url_or_endpoint is called
-    result = initialize_azure_sdk_client(
+    result = BaseAzureLLM().initialize_azure_sdk_client(
        litellm_params={},
        api_key="test-api-key",
        api_base="https://test.openai.azure.com",
@ -210,3 +211,78 @@ def test_select_azure_base_url_called(setup_mocks):

    # Verify that select_azure_base_url_or_endpoint was called
    setup_mocks["select_url"].assert_called_once()
+
+
+@pytest.mark.parametrize(
+    "call_type",
+    [
+        CallTypes.acompletion,
+        CallTypes.atext_completion,
+        CallTypes.aembedding,
+        CallTypes.arerank,
+        CallTypes.atranscription,
+    ],
+)
+@pytest.mark.asyncio
+async def test_ensure_initialize_azure_sdk_client_always_used(call_type):
+    from litellm.router import Router
+
+    # Create a router with an Azure model
+    azure_model_name = "azure/chatgpt-v-2"
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": azure_model_name,
+                    "api_key": "test-api-key",
+                    "api_version": os.getenv("AZURE_API_VERSION", "2023-05-15"),
+                    "api_base": os.getenv(
+                        "AZURE_API_BASE", "https://test.openai.azure.com"
+                    ),
+                },
+            }
+        ],
+    )
+
+    # Prepare test input based on call type
+    test_inputs = {
+        "acompletion": {
+            "messages": [{"role": "user", "content": "Hello, how are you?"}]
+        },
+        "atext_completion": {"prompt": "Hello, how are you?"},
+        "aimage_generation": {"prompt": "Hello, how are you?"},
+        "aembedding": {"input": "Hello, how are you?"},
+        "arerank": {"input": "Hello, how are you?"},
+        "atranscription": {"file": "path/to/file"},
+    }
+
+    # Get appropriate input for this call type
+    input_kwarg = test_inputs.get(call_type.value, {})
+
+    # Mock the initialize_azure_sdk_client function
+    with patch(
+        "litellm.main.azure_chat_completions.initialize_azure_sdk_client"
+    ) as mock_init_azure:
+        # Also mock async_function_with_fallbacks to prevent actual API calls
+        # Call the appropriate router method
+        try:
+            await getattr(router, call_type.value)(
+                model="gpt-3.5-turbo",
+                **input_kwarg,
+                num_retries=0,
+            )
+        except Exception as e:
+            print(e)
+
+        # Verify initialize_azure_sdk_client was called
+        mock_init_azure.assert_called_once()
+
+        # Verify it was called with the right model name
+        calls = mock_init_azure.call_args_list
+        azure_calls = [call for call in calls]
+
+        # More detailed verification (optional)
+        for call in azure_calls:
+            assert "api_key" in call.kwargs, "api_key not found in parameters"
+            assert "api_base" in call.kwargs, "api_base not found in parameters"