(fix) Azure AI Studio - using image_url in content with both text and image_url (#6774)

* use helper _audio_or_image_in_message_content * update azure ai transf * test_azure_ai_with_image_url
2024-11-16 20:05:24 -08:00 · 2024-11-16 20:05:24 -08:00 · 160357d54c
commit 160357d54c
parent 0c7360d470
3 changed files with 128 additions and 3 deletions
--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@ -3,7 +3,10 @@ from typing import List, Optional, Tuple
 import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.OpenAI.openai import OpenAIConfig
-from litellm.llms.prompt_templates.common_utils import convert_content_list_to_str
+from litellm.llms.prompt_templates.common_utils import (
    _audio_or_image_in_message_content,
    convert_content_list_to_str,
 )
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ProviderField
@ -27,8 +30,21 @@ class AzureAIStudioConfig(OpenAIConfig):
            ),
        ]
-    def _transform_messages(self, messages: List[AllMessageValues]) -> List:
+    def _transform_messages(
        self,
        messages: List[AllMessageValues],
    ) -> List:
        """
        - Azure AI Studio doesn't support content as a list. This handles:
            1. Transforms list content to a string.
            2. If message contains an image or audio, send as is (user-intended)
        """
        for message in messages:
            # Do nothing if the message contains an image or audio
            if _audio_or_image_in_message_content(message):
                continue
            texts = convert_content_list_to_str(message=message)
            if texts:
                message["content"] = texts
--- a/litellm/llms/prompt_templates/common_utils.py
+++ b/litellm/llms/prompt_templates/common_utils.py
@ -41,7 +41,6 @@ def convert_content_list_to_str(message: AllMessageValues) -> str:
    """
    - handles scenario where content is list and not string
    - content list is just text, and no images
    - if image passed in, then just return as is (user-intended)
    Motivation: mistral api + azure ai don't support content as a list
    """
@ -59,6 +58,19 @@ def convert_content_list_to_str(message: AllMessageValues) -> str:
    return texts
 def _audio_or_image_in_message_content(message: AllMessageValues) -> bool:
    """
    Checks if message content contains an image or audio
    """
    message_content = message.get("content")
    if message_content:
        if message_content is not None and isinstance(message_content, list):
            for c in message_content:
                if c.get("type") == "image_url" or c.get("type") == "input_audio":
                    return True
    return False
 def convert_openai_message_to_only_content_messages(
    messages: List[AllMessageValues],
 ) -> List[Dict[str, str]]:
--- a/tests/llm_translation/test_azure_ai.py
+++ b/tests/llm_translation/test_azure_ai.py
@ -11,6 +11,9 @@ from dotenv import load_dotenv
 import litellm.types
 import litellm.types.utils
 from litellm.llms.anthropic.chat import ModelResponseIterator
 import httpx
 import json
 from respx import MockRouter
 load_dotenv()
 import io
@ -39,3 +42,97 @@ def test_map_azure_model_group(model_group_header, expected_model):
    config = AzureAICohereConfig()
    assert config._map_azure_model_group(model_group_header) == expected_model
@pytest.mark.asyncio
@pytest.mark.respx
 async def test_azure_ai_with_image_url(respx_mock: MockRouter):
    """
    Important test:
    Test that Azure AI studio can handle image_url passed when content is a list containing both text and image_url
    """
    litellm.set_verbose = True
    # Mock response based on the actual API response
    mock_response = {
        "id": "cmpl-53860ea1efa24d2883555bfec13d2254",
        "choices": [
            {
                "finish_reason": "stop",
                "index": 0,
                "logprobs": None,
                "message": {
                    "content": "The image displays a graphic with the text 'LiteLLM' in black",
                    "role": "assistant",
                    "refusal": None,
                    "audio": None,
                    "function_call": None,
                    "tool_calls": None,
                },
            }
        ],
        "created": 1731801937,
        "model": "phi35-vision-instruct",
        "object": "chat.completion",
        "usage": {
            "completion_tokens": 69,
            "prompt_tokens": 617,
            "total_tokens": 686,
            "completion_tokens_details": None,
            "prompt_tokens_details": None,
        },
    }
    # Mock the API request
    mock_request = respx_mock.post(
        "https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com"
    ).mock(return_value=httpx.Response(200, json=mock_response))
    response = await litellm.acompletion(
        model="azure_ai/Phi-3-5-vision-instruct-dcvov",
        api_base="https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "What is in this image?",
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
                        },
                    },
                ],
            },
        ],
        api_key="fake-api-key",
    )
    # Verify the request was made
    assert mock_request.called
    # Check the request body
    request_body = json.loads(mock_request.calls[0].request.content)
    assert request_body == {
        "model": "Phi-3-5-vision-instruct-dcvov",
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What is in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
                        },
                    },
                ],
            }
        ],
    }
    print(f"response: {response}")