From 160357d54cbdb7050a536f8c18f32315ad42e155 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 16 Nov 2024 20:05:24 -0800 Subject: [PATCH] (fix) Azure AI Studio - using `image_url` in content with both text and image_url (#6774) * use helper _audio_or_image_in_message_content * update azure ai transf * test_azure_ai_with_image_url --- litellm/llms/azure_ai/chat/transformation.py | 20 +++- litellm/llms/prompt_templates/common_utils.py | 14 ++- tests/llm_translation/test_azure_ai.py | 97 +++++++++++++++++++ 3 files changed, 128 insertions(+), 3 deletions(-) diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py index 9767282fb..d8924fbb9 100644 --- a/litellm/llms/azure_ai/chat/transformation.py +++ b/litellm/llms/azure_ai/chat/transformation.py @@ -3,7 +3,10 @@ from typing import List, Optional, Tuple import litellm from litellm._logging import verbose_logger from litellm.llms.OpenAI.openai import OpenAIConfig -from litellm.llms.prompt_templates.common_utils import convert_content_list_to_str +from litellm.llms.prompt_templates.common_utils import ( + _audio_or_image_in_message_content, + convert_content_list_to_str, +) from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import ProviderField @@ -27,8 +30,21 @@ class AzureAIStudioConfig(OpenAIConfig): ), ] - def _transform_messages(self, messages: List[AllMessageValues]) -> List: + def _transform_messages( + self, + messages: List[AllMessageValues], + ) -> List: + """ + - Azure AI Studio doesn't support content as a list. This handles: + 1. Transforms list content to a string. + 2. If message contains an image or audio, send as is (user-intended) + """ for message in messages: + + # Do nothing if the message contains an image or audio + if _audio_or_image_in_message_content(message): + continue + texts = convert_content_list_to_str(message=message) if texts: message["content"] = texts diff --git a/litellm/llms/prompt_templates/common_utils.py b/litellm/llms/prompt_templates/common_utils.py index a91ec2170..24cb7b451 100644 --- a/litellm/llms/prompt_templates/common_utils.py +++ b/litellm/llms/prompt_templates/common_utils.py @@ -41,7 +41,6 @@ def convert_content_list_to_str(message: AllMessageValues) -> str: """ - handles scenario where content is list and not string - content list is just text, and no images - - if image passed in, then just return as is (user-intended) Motivation: mistral api + azure ai don't support content as a list """ @@ -59,6 +58,19 @@ def convert_content_list_to_str(message: AllMessageValues) -> str: return texts +def _audio_or_image_in_message_content(message: AllMessageValues) -> bool: + """ + Checks if message content contains an image or audio + """ + message_content = message.get("content") + if message_content: + if message_content is not None and isinstance(message_content, list): + for c in message_content: + if c.get("type") == "image_url" or c.get("type") == "input_audio": + return True + return False + + def convert_openai_message_to_only_content_messages( messages: List[AllMessageValues], ) -> List[Dict[str, str]]: diff --git a/tests/llm_translation/test_azure_ai.py b/tests/llm_translation/test_azure_ai.py index 78e719c52..944e20148 100644 --- a/tests/llm_translation/test_azure_ai.py +++ b/tests/llm_translation/test_azure_ai.py @@ -11,6 +11,9 @@ from dotenv import load_dotenv import litellm.types import litellm.types.utils from litellm.llms.anthropic.chat import ModelResponseIterator +import httpx +import json +from respx import MockRouter load_dotenv() import io @@ -39,3 +42,97 @@ def test_map_azure_model_group(model_group_header, expected_model): config = AzureAICohereConfig() assert config._map_azure_model_group(model_group_header) == expected_model + + +@pytest.mark.asyncio +@pytest.mark.respx +async def test_azure_ai_with_image_url(respx_mock: MockRouter): + """ + Important test: + + Test that Azure AI studio can handle image_url passed when content is a list containing both text and image_url + """ + litellm.set_verbose = True + + # Mock response based on the actual API response + mock_response = { + "id": "cmpl-53860ea1efa24d2883555bfec13d2254", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": None, + "message": { + "content": "The image displays a graphic with the text 'LiteLLM' in black", + "role": "assistant", + "refusal": None, + "audio": None, + "function_call": None, + "tool_calls": None, + }, + } + ], + "created": 1731801937, + "model": "phi35-vision-instruct", + "object": "chat.completion", + "usage": { + "completion_tokens": 69, + "prompt_tokens": 617, + "total_tokens": 686, + "completion_tokens_details": None, + "prompt_tokens_details": None, + }, + } + + # Mock the API request + mock_request = respx_mock.post( + "https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com" + ).mock(return_value=httpx.Response(200, json=mock_response)) + + response = await litellm.acompletion( + model="azure_ai/Phi-3-5-vision-instruct-dcvov", + api_base="https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this image?", + }, + { + "type": "image_url", + "image_url": { + "url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png" + }, + }, + ], + }, + ], + api_key="fake-api-key", + ) + + # Verify the request was made + assert mock_request.called + + # Check the request body + request_body = json.loads(mock_request.calls[0].request.content) + assert request_body == { + "model": "Phi-3-5-vision-instruct-dcvov", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What is in this image?"}, + { + "type": "image_url", + "image_url": { + "url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png" + }, + }, + ], + } + ], + } + + print(f"response: {response}")