litellm-mirror/tests/litellm/llms/azure/test_azure_common_utils.py
2025-04-11 21:50:17 -07:00

644 lines
21 KiB
Python

import json
import os
import sys
import traceback
from typing import Callable, Optional
from unittest.mock import MagicMock, patch
import pytest
sys.path.insert(
0, os.path.abspath("../../../..")
) # Adds the parent directory to the system path
import litellm
from litellm.llms.azure.common_utils import BaseAzureLLM
from litellm.types.utils import CallTypes
# Mock the necessary dependencies
@pytest.fixture
def setup_mocks():
with patch(
"litellm.llms.azure.common_utils.get_azure_ad_token_from_entrata_id"
) as mock_entrata_token, patch(
"litellm.llms.azure.common_utils.get_azure_ad_token_from_username_password"
) as mock_username_password_token, patch(
"litellm.llms.azure.common_utils.get_azure_ad_token_from_oidc"
) as mock_oidc_token, patch(
"litellm.llms.azure.common_utils.get_azure_ad_token_provider"
) as mock_token_provider, patch(
"litellm.llms.azure.common_utils.litellm"
) as mock_litellm, patch(
"litellm.llms.azure.common_utils.verbose_logger"
) as mock_logger, patch(
"litellm.llms.azure.common_utils.select_azure_base_url_or_endpoint"
) as mock_select_url:
# Configure mocks
mock_litellm.AZURE_DEFAULT_API_VERSION = "2023-05-15"
mock_litellm.enable_azure_ad_token_refresh = False
mock_entrata_token.return_value = lambda: "mock-entrata-token"
mock_username_password_token.return_value = (
lambda: "mock-username-password-token"
)
mock_oidc_token.return_value = "mock-oidc-token"
mock_token_provider.return_value = lambda: "mock-default-token"
mock_select_url.side_effect = (
lambda azure_client_params, **kwargs: azure_client_params
)
yield {
"entrata_token": mock_entrata_token,
"username_password_token": mock_username_password_token,
"oidc_token": mock_oidc_token,
"token_provider": mock_token_provider,
"litellm": mock_litellm,
"logger": mock_logger,
"select_url": mock_select_url,
}
def test_initialize_with_api_key(setup_mocks):
# Test with api_key provided
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={},
api_key="test-api-key",
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version="2023-06-01",
is_async=False,
)
# Verify expected result
assert result["api_key"] == "test-api-key"
assert result["azure_endpoint"] == "https://test.openai.azure.com"
assert result["api_version"] == "2023-06-01"
assert "azure_ad_token" in result
assert result["azure_ad_token"] is None
def test_initialize_with_tenant_credentials(setup_mocks):
# Test with tenant_id, client_id, and client_secret provided
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={
"tenant_id": "test-tenant-id",
"client_id": "test-client-id",
"client_secret": "test-client-secret",
},
api_key=None,
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version=None,
is_async=False,
)
# Verify that get_azure_ad_token_from_entrata_id was called
setup_mocks["entrata_token"].assert_called_once_with(
tenant_id="test-tenant-id",
client_id="test-client-id",
client_secret="test-client-secret",
)
# Verify expected result
assert result["api_key"] is None
assert result["azure_endpoint"] == "https://test.openai.azure.com"
assert "azure_ad_token_provider" in result
def test_initialize_with_username_password(setup_mocks):
# Test with azure_username, azure_password, and client_id provided
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={
"azure_username": "test-username",
"azure_password": "test-password",
"client_id": "test-client-id",
},
api_key=None,
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version=None,
is_async=False,
)
# Verify that get_azure_ad_token_from_username_password was called
setup_mocks["username_password_token"].assert_called_once_with(
azure_username="test-username",
azure_password="test-password",
client_id="test-client-id",
)
# Verify expected result
assert "azure_ad_token_provider" in result
def test_initialize_with_oidc_token(setup_mocks):
# Test with azure_ad_token that starts with "oidc/"
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={"azure_ad_token": "oidc/test-token"},
api_key=None,
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version=None,
is_async=False,
)
# Verify that get_azure_ad_token_from_oidc was called
setup_mocks["oidc_token"].assert_called_once_with("oidc/test-token")
# Verify expected result
assert result["azure_ad_token"] == "mock-oidc-token"
def test_initialize_with_enable_token_refresh(setup_mocks):
# Enable token refresh
setup_mocks["litellm"].enable_azure_ad_token_refresh = True
# Test with token refresh enabled
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={},
api_key=None,
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version=None,
is_async=False,
)
# Verify that get_azure_ad_token_provider was called
setup_mocks["token_provider"].assert_called_once()
# Verify expected result
assert "azure_ad_token_provider" in result
def test_initialize_with_token_refresh_error(setup_mocks):
# Enable token refresh but make it raise an error
setup_mocks["litellm"].enable_azure_ad_token_refresh = True
setup_mocks["token_provider"].side_effect = ValueError("Token provider error")
# Test with token refresh enabled but raising error
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={},
api_key=None,
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version=None,
is_async=False,
)
# Verify error was logged
setup_mocks["logger"].debug.assert_any_call(
"Azure AD Token Provider could not be used."
)
def test_api_version_from_env_var(setup_mocks):
# Test api_version from environment variable
with patch.dict(os.environ, {"AZURE_API_VERSION": "2023-07-01"}):
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={},
api_key="test-api-key",
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version=None,
is_async=False,
)
# Verify expected result
assert result["api_version"] == "2023-07-01"
def test_select_azure_base_url_called(setup_mocks):
# Test that select_azure_base_url_or_endpoint is called
result = BaseAzureLLM().initialize_azure_sdk_client(
litellm_params={},
api_key="test-api-key",
api_base="https://test.openai.azure.com",
model_name="gpt-4",
api_version="2023-06-01",
is_async=False,
)
# Verify that select_azure_base_url_or_endpoint was called
setup_mocks["select_url"].assert_called_once()
@pytest.mark.parametrize(
"call_type",
[
call_type
for call_type in CallTypes.__members__.values()
if call_type.name.startswith("a")
and call_type.name
not in [
"amoderation",
"arerank",
"arealtime",
"anthropic_messages",
"add_message",
"arun_thread_stream",
"aresponses",
]
],
)
@pytest.mark.asyncio
async def test_ensure_initialize_azure_sdk_client_always_used(call_type):
from litellm.router import Router
# Create a router with an Azure model
azure_model_name = "azure/chatgpt-v-2"
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": azure_model_name,
"api_key": "test-api-key",
"api_version": os.getenv("AZURE_API_VERSION", "2023-05-15"),
"api_base": os.getenv(
"AZURE_API_BASE", "https://test.openai.azure.com"
),
},
}
],
)
# Prepare test input based on call type
test_inputs = {
"acompletion": {
"messages": [{"role": "user", "content": "Hello, how are you?"}]
},
"atext_completion": {"prompt": "Hello, how are you?"},
"aimage_generation": {"prompt": "Hello, how are you?"},
"aembedding": {"input": "Hello, how are you?"},
"arerank": {"input": "Hello, how are you?"},
"atranscription": {"file": "path/to/file"},
"aspeech": {"input": "Hello, how are you?", "voice": "female"},
"acreate_batch": {
"completion_window": 10,
"endpoint": "https://test.openai.azure.com",
"input_file_id": "123",
},
"aretrieve_batch": {"batch_id": "123"},
"aget_assistants": {"custom_llm_provider": "azure"},
"acreate_assistants": {"custom_llm_provider": "azure"},
"adelete_assistant": {"custom_llm_provider": "azure", "assistant_id": "123"},
"acreate_thread": {"custom_llm_provider": "azure"},
"aget_thread": {"custom_llm_provider": "azure", "thread_id": "123"},
"a_add_message": {
"custom_llm_provider": "azure",
"thread_id": "123",
"role": "user",
"content": "Hello, how are you?",
},
"aget_messages": {"custom_llm_provider": "azure", "thread_id": "123"},
"arun_thread": {
"custom_llm_provider": "azure",
"assistant_id": "123",
"thread_id": "123",
},
"acreate_file": {
"custom_llm_provider": "azure",
"file": MagicMock(),
"purpose": "assistants",
},
"afile_content": {
"custom_llm_provider": "azure",
"file_id": "123",
},
"afile_delete": {
"custom_llm_provider": "azure",
"file_id": "123",
},
}
# Get appropriate input for this call type
input_kwarg = test_inputs.get(call_type.value, {})
patch_target = (
"litellm.llms.azure.common_utils.BaseAzureLLM.initialize_azure_sdk_client"
)
if call_type == CallTypes.arerank:
patch_target = (
"litellm.rerank_api.main.azure_rerank.initialize_azure_sdk_client"
)
elif call_type == CallTypes.acreate_batch or call_type == CallTypes.aretrieve_batch:
patch_target = (
"litellm.batches.main.azure_batches_instance.initialize_azure_sdk_client"
)
elif (
call_type == CallTypes.aget_assistants
or call_type == CallTypes.acreate_assistants
or call_type == CallTypes.adelete_assistant
or call_type == CallTypes.acreate_thread
or call_type == CallTypes.aget_thread
or call_type == CallTypes.a_add_message
or call_type == CallTypes.aget_messages
or call_type == CallTypes.arun_thread
):
patch_target = (
"litellm.assistants.main.azure_assistants_api.initialize_azure_sdk_client"
)
elif call_type == CallTypes.acreate_file or call_type == CallTypes.afile_content:
patch_target = (
"litellm.files.main.azure_files_instance.initialize_azure_sdk_client"
)
# Mock the initialize_azure_sdk_client function
with patch(patch_target) as mock_init_azure:
# Also mock async_function_with_fallbacks to prevent actual API calls
# Call the appropriate router method
try:
get_attr = getattr(router, call_type.value, None)
if get_attr is None:
pytest.skip(
f"Skipping {call_type.value} because it is not supported on Router"
)
await getattr(router, call_type.value)(
model="gpt-3.5-turbo",
**input_kwarg,
num_retries=0,
azure_ad_token="oidc/test-token",
)
except Exception as e:
traceback.print_exc()
# Verify initialize_azure_sdk_client was called
mock_init_azure.assert_called_once()
# Verify it was called with the right model name
calls = mock_init_azure.call_args_list
azure_calls = [call for call in calls]
litellm_params = azure_calls[0].kwargs["litellm_params"]
print("litellm_params", litellm_params)
assert (
"azure_ad_token" in litellm_params
), "azure_ad_token not found in parameters"
assert (
litellm_params["azure_ad_token"] == "oidc/test-token"
), "azure_ad_token is not correct"
# More detailed verification (optional)
for call in azure_calls:
assert "api_key" in call.kwargs, "api_key not found in parameters"
assert "api_base" in call.kwargs, "api_base not found in parameters"
@pytest.mark.parametrize(
"call_type",
[
CallTypes.atext_completion,
CallTypes.acompletion,
],
)
@pytest.mark.asyncio
async def test_ensure_initialize_azure_sdk_client_always_used_azure_text(call_type):
from litellm.router import Router
# Create a router with an Azure model
azure_model_name = "azure_text/chatgpt-v-2"
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": azure_model_name,
"api_key": "test-api-key",
"api_version": os.getenv("AZURE_API_VERSION", "2023-05-15"),
"api_base": os.getenv(
"AZURE_API_BASE", "https://test.openai.azure.com"
),
},
}
],
)
# Prepare test input based on call type
test_inputs = {
"acompletion": {
"messages": [{"role": "user", "content": "Hello, how are you?"}]
},
"atext_completion": {"prompt": "Hello, how are you?"},
}
# Get appropriate input for this call type
input_kwarg = test_inputs.get(call_type.value, {})
patch_target = "litellm.main.azure_text_completions.initialize_azure_sdk_client"
# Mock the initialize_azure_sdk_client function
with patch(patch_target) as mock_init_azure:
# Also mock async_function_with_fallbacks to prevent actual API calls
# Call the appropriate router method
try:
get_attr = getattr(router, call_type.value, None)
if get_attr is None:
pytest.skip(
f"Skipping {call_type.value} because it is not supported on Router"
)
await getattr(router, call_type.value)(
model="gpt-3.5-turbo",
**input_kwarg,
num_retries=0,
azure_ad_token="oidc/test-token",
)
except Exception as e:
traceback.print_exc()
# Verify initialize_azure_sdk_client was called
mock_init_azure.assert_called_once()
# Verify it was called with the right model name
calls = mock_init_azure.call_args_list
azure_calls = [call for call in calls]
litellm_params = azure_calls[0].kwargs["litellm_params"]
print("litellm_params", litellm_params)
assert (
"azure_ad_token" in litellm_params
), "azure_ad_token not found in parameters"
assert (
litellm_params["azure_ad_token"] == "oidc/test-token"
), "azure_ad_token is not correct"
# More detailed verification (optional)
for call in azure_calls:
assert "api_key" in call.kwargs, "api_key not found in parameters"
assert "api_base" in call.kwargs, "api_base not found in parameters"
# Test parameters for different API functions with Azure models
AZURE_API_FUNCTION_PARAMS = [
# (function_name, is_async, args)
(
"completion",
False,
{
"model": "azure/gpt-4",
"messages": [{"role": "user", "content": "Hello"}],
"max_tokens": 10,
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
(
"completion",
True,
{
"model": "azure/gpt-4",
"messages": [{"role": "user", "content": "Hello"}],
"max_tokens": 10,
"stream": True,
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
(
"embedding",
False,
{
"model": "azure/text-embedding-ada-002",
"input": "Hello world",
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
(
"embedding",
True,
{
"model": "azure/text-embedding-ada-002",
"input": "Hello world",
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
(
"speech",
False,
{
"model": "azure/tts-1",
"input": "Hello, this is a test of text to speech",
"voice": "alloy",
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
(
"speech",
True,
{
"model": "azure/tts-1",
"input": "Hello, this is a test of text to speech",
"voice": "alloy",
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
(
"transcription",
False,
{
"model": "azure/whisper-1",
"file": MagicMock(),
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
(
"transcription",
True,
{
"model": "azure/whisper-1",
"file": MagicMock(),
"api_key": "test-api-key",
"api_base": "https://test.openai.azure.com",
"api_version": "2023-05-15",
},
),
]
@pytest.mark.parametrize("function_name,is_async,args", AZURE_API_FUNCTION_PARAMS)
@pytest.mark.asyncio
async def test_azure_client_reuse(function_name, is_async, args):
"""
Test that multiple Azure API calls reuse the same Azure OpenAI client
"""
litellm.set_verbose = True
# Determine which client class to mock based on whether the test is async
client_path = (
"litellm.llms.azure.common_utils.AsyncAzureOpenAI"
if is_async
else "litellm.llms.azure.common_utils.AzureOpenAI"
)
# Create a proper mock class that can pass isinstance checks
mock_client = MagicMock()
# Create the appropriate patches
with patch(client_path) as mock_client_class, patch.object(
BaseAzureLLM, "set_cached_openai_client"
) as mock_set_cache, patch.object(
BaseAzureLLM, "get_cached_openai_client"
) as mock_get_cache, patch.object(
BaseAzureLLM, "initialize_azure_sdk_client"
) as mock_init_azure:
# Configure the mock client class to return our mock instance
mock_client_class.return_value = mock_client
# Setup the mock to return None first time (cache miss) then a client for subsequent calls
mock_get_cache.side_effect = [None] + [
mock_client
] * 9 # First call returns None, rest return the mock client
# Mock the initialize_azure_sdk_client to return a dict with the necessary params
mock_init_azure.return_value = {
"api_key": args.get("api_key"),
"azure_endpoint": args.get("api_base"),
"api_version": args.get("api_version"),
"azure_ad_token": None,
"azure_ad_token_provider": None,
}
# Make 10 API calls
for _ in range(10):
try:
# Call the appropriate function based on parameters
if is_async:
# Add 'a' prefix for async functions
func = getattr(litellm, f"a{function_name}")
await func(**args)
else:
func = getattr(litellm, function_name)
func(**args)
except Exception:
# We expect exceptions since we're mocking the client
pass
# Verify client was created only once
assert (
mock_client_class.call_count == 1
), f"{'Async' if is_async else ''}AzureOpenAI client should be created only once"
# Verify initialize_azure_sdk_client was called once
assert (
mock_init_azure.call_count == 1
), "initialize_azure_sdk_client should be called once"
# Verify the client was cached
assert mock_set_cache.call_count == 1, "Client should be cached once"
# Verify we tried to get from cache 10 times (once per request)
assert mock_get_cache.call_count == 10, "Should check cache for each request"