Fix azure tenant id check from env var + response_format check on api_version 2025+ (#9993)

* fix(azure/common_utils.py): check for azure tenant id, client id, client secret in env var Fixes https://github.com/BerriAI/litellm/issues/9598#issuecomment-2801966027 * fix(azure/gpt_transformation.py): fix passing response_format to azure when api year = 2025 Fixes https://github.com/BerriAI/litellm/issues/9703 * test: monkeypatch azure api version in test * test: update testing * test: fix test * test: update test * docs(config_settings.md): document env vars
2025-04-24 18:24:20 +00:00 · 2025-04-14 22:02:35 -07:00 · 2025-04-14 22:02:35 -07:00 · 8faf56922c
commit 8faf56922c
parent ce2595f56a
5 changed files with 84 additions and 15 deletions
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@ -323,6 +323,9 @@ router_settings:
 | AZURE_AUTHORITY_HOST | Azure authority host URL
 | AZURE_CLIENT_ID | Client ID for Azure services
 | AZURE_CLIENT_SECRET | Client secret for Azure services
+| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
+| AZURE_USERNAME | Username for Azure services, use in conjunction with AZURE_PASSWORD for azure ad token with basic username/password workflow
+| AZURE_PASSWORD | Password for Azure services, use in conjunction with AZURE_USERNAME for azure ad token with basic username/password workflow
 | AZURE_FEDERATED_TOKEN_FILE | File path to Azure federated token
 | AZURE_KEY_VAULT_URI | URI for Azure Key Vault
 | AZURE_STORAGE_ACCOUNT_KEY | The Azure Storage Account Key to use for Authentication to Azure Blob Storage logging
@ -331,7 +334,7 @@ router_settings:
 | AZURE_STORAGE_TENANT_ID | The Application Tenant ID to use for Authentication to Azure Blob Storage logging
 | AZURE_STORAGE_CLIENT_ID | The Application Client ID to use for Authentication to Azure Blob Storage logging
 | AZURE_STORAGE_CLIENT_SECRET | The Application Client Secret to use for Authentication to Azure Blob Storage logging
-| AZURE_TENANT_ID | Tenant ID for Azure Active Directory
+
 | BERRISPEND_ACCOUNT_ID | Account ID for BerriSpend service
 | BRAINTRUST_API_KEY | API key for Braintrust integration
 | CIRCLE_OIDC_TOKEN | OpenID Connect token for CircleCI
--- a/litellm/llms/azure/chat/gpt_transformation.py
+++ b/litellm/llms/azure/chat/gpt_transformation.py
@ -125,14 +125,22 @@ class AzureOpenAIConfig(BaseConfig):
    ) -> bool:
        """
        - check if api_version is supported for response_format
+        - returns True if the API version is equal to or newer than the supported version
        """
+        api_year = int(api_version_year)
+        api_month = int(api_version_month)
+        supported_year = int(API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT)
+        supported_month = int(API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT)

-        is_supported = (
-            int(api_version_year) <= API_VERSION_YEAR_SUPPORTED_RESPONSE_FORMAT
-            and int(api_version_month) >= API_VERSION_MONTH_SUPPORTED_RESPONSE_FORMAT
-        )
-
-        return is_supported
+        # If the year is greater than supported year, it's definitely supported
+        if api_year > supported_year:
+            return True
+        # If the year is less than supported year, it's not supported
+        elif api_year < supported_year:
+            return False
+        # If same year, check if month is >= supported month
+        else:
+            return api_month >= supported_month

    def map_openai_params(
        self,
@ -202,6 +210,7 @@ class AzureOpenAIConfig(BaseConfig):
                    is_response_format_supported_api_version
                    and _is_response_format_supported_model
                )
+
                optional_params = self._add_response_format_to_tools(
                    optional_params=optional_params,
                    value=value,
--- a/litellm/llms/azure/common_utils.py
+++ b/litellm/llms/azure/common_utils.py
@ -309,21 +309,30 @@ class BaseAzureLLM(BaseOpenAILLM):
        azure_ad_token_provider: Optional[Callable[[], str]] = None
        # If we have api_key, then we have higher priority
        azure_ad_token = litellm_params.get("azure_ad_token")
-        tenant_id = litellm_params.get("tenant_id")
-        client_id = litellm_params.get("client_id")
-        client_secret = litellm_params.get("client_secret")
-        azure_username = litellm_params.get("azure_username")
-        azure_password = litellm_params.get("azure_password")
+        tenant_id = litellm_params.get("tenant_id", os.getenv("AZURE_TENANT_ID"))
+        client_id = litellm_params.get("client_id", os.getenv("AZURE_CLIENT_ID"))
+        client_secret = litellm_params.get(
+            "client_secret", os.getenv("AZURE_CLIENT_SECRET")
+        )
+        azure_username = litellm_params.get(
+            "azure_username", os.getenv("AZURE_USERNAME")
+        )
+        azure_password = litellm_params.get(
+            "azure_password", os.getenv("AZURE_PASSWORD")
+        )
        max_retries = litellm_params.get("max_retries")
        timeout = litellm_params.get("timeout")
        if not api_key and tenant_id and client_id and client_secret:
-            verbose_logger.debug("Using Azure AD Token Provider for Azure Auth")
+            verbose_logger.debug(
+                "Using Azure AD Token Provider from Entrata ID for Azure Auth"
+            )
            azure_ad_token_provider = get_azure_ad_token_from_entrata_id(
                tenant_id=tenant_id,
                client_id=client_id,
                client_secret=client_secret,
            )
        if azure_username and azure_password and client_id:
+            verbose_logger.debug("Using Azure Username and Password for Azure Auth")
            azure_ad_token_provider = get_azure_ad_token_from_username_password(
                azure_username=azure_username,
                azure_password=azure_password,
@ -331,12 +340,16 @@ class BaseAzureLLM(BaseOpenAILLM):
            )

        if azure_ad_token is not None and azure_ad_token.startswith("oidc/"):
+            verbose_logger.debug("Using Azure OIDC Token for Azure Auth")
            azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
        elif (
            not api_key
            and azure_ad_token_provider is None
            and litellm.enable_azure_ad_token_refresh is True
        ):
+            verbose_logger.debug(
+                "Using Azure AD token provider based on Service Principal with Secret workflow for Azure Auth"
+            )
            try:
                azure_ad_token_provider = get_azure_ad_token_provider()
            except ValueError:
--- a/tests/litellm/llms/azure/test_azure_common_utils.py
+++ b/tests/litellm/llms/azure/test_azure_common_utils.py
@ -78,6 +78,33 @@ def test_initialize_with_api_key(setup_mocks):
    assert result["azure_ad_token"] is None


+def test_initialize_with_tenant_credentials_env_var(setup_mocks, monkeypatch):
+    monkeypatch.setenv("AZURE_TENANT_ID", "test-tenant-id")
+    monkeypatch.setenv("AZURE_CLIENT_ID", "test-client-id")
+    monkeypatch.setenv("AZURE_CLIENT_SECRET", "test-client-secret")
+
+    result = BaseAzureLLM().initialize_azure_sdk_client(
+        litellm_params={},
+        api_key=None,
+        api_base="https://test.openai.azure.com",
+        model_name="gpt-4",
+        api_version=None,
+        is_async=False,
+    )
+
+    # Verify that get_azure_ad_token_from_entrata_id was called
+    setup_mocks["entrata_token"].assert_called_once_with(
+        tenant_id="test-tenant-id",
+        client_id="test-client-id",
+        client_secret="test-client-secret",
+    )
+
+    # Verify expected result
+    assert result["api_key"] is None
+    assert result["azure_endpoint"] == "https://test.openai.azure.com"
+    assert "azure_ad_token_provider" in result
+
+
 def test_initialize_with_tenant_credentials(setup_mocks):
    # Test with tenant_id, client_id, and client_secret provided
    result = BaseAzureLLM().initialize_azure_sdk_client(
@ -150,8 +177,12 @@ def test_initialize_with_oidc_token(setup_mocks):
    assert result["azure_ad_token"] == "mock-oidc-token"


-def test_initialize_with_enable_token_refresh(setup_mocks):
+def test_initialize_with_enable_token_refresh(setup_mocks, monkeypatch):
+    litellm._turn_on_debug()
    # Enable token refresh
+    monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
+    monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
+    monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
    setup_mocks["litellm"].enable_azure_ad_token_refresh = True

    # Test with token refresh enabled
@ -171,8 +202,11 @@ def test_initialize_with_enable_token_refresh(setup_mocks):
    assert "azure_ad_token_provider" in result


-def test_initialize_with_token_refresh_error(setup_mocks):
+def test_initialize_with_token_refresh_error(setup_mocks, monkeypatch):
    # Enable token refresh but make it raise an error
+    monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
+    monkeypatch.delenv("AZURE_CLIENT_SECRET", raising=False)
+    monkeypatch.delenv("AZURE_TENANT_ID", raising=False)
    setup_mocks["litellm"].enable_azure_ad_token_refresh = True
    setup_mocks["token_provider"].side_effect = ValueError("Token provider error")

--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@ -1449,3 +1449,13 @@ def test_anthropic_unified_reasoning_content(model, provider):
    )
    assert optional_params["thinking"] == {"type": "enabled", "budget_tokens": 4096}

+
+
+def test_azure_response_format(monkeypatch):
+    monkeypatch.setenv("AZURE_API_VERSION", "2025-02-01")
+    optional_params = get_optional_params(
+        model="azure/gpt-4o-mini",
+        custom_llm_provider="azure",
+        response_format={"type": "json_object"},
+    )
+    assert optional_params["response_format"] == {"type": "json_object"}