(fix) Azure AI Studio - using image_url in content with both text and image_url (#6774)

* use helper _audio_or_image_in_message_content

* update azure ai transf

* test_azure_ai_with_image_url
This commit is contained in:
Ishaan Jaff 2024-11-16 20:05:24 -08:00 committed by GitHub
parent 0c7360d470
commit 160357d54c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 128 additions and 3 deletions

View file

@ -3,7 +3,10 @@ from typing import List, Optional, Tuple
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
from litellm.llms.OpenAI.openai import OpenAIConfig from litellm.llms.OpenAI.openai import OpenAIConfig
from litellm.llms.prompt_templates.common_utils import convert_content_list_to_str from litellm.llms.prompt_templates.common_utils import (
_audio_or_image_in_message_content,
convert_content_list_to_str,
)
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ProviderField from litellm.types.utils import ProviderField
@ -27,8 +30,21 @@ class AzureAIStudioConfig(OpenAIConfig):
), ),
] ]
def _transform_messages(self, messages: List[AllMessageValues]) -> List: def _transform_messages(
self,
messages: List[AllMessageValues],
) -> List:
"""
- Azure AI Studio doesn't support content as a list. This handles:
1. Transforms list content to a string.
2. If message contains an image or audio, send as is (user-intended)
"""
for message in messages: for message in messages:
# Do nothing if the message contains an image or audio
if _audio_or_image_in_message_content(message):
continue
texts = convert_content_list_to_str(message=message) texts = convert_content_list_to_str(message=message)
if texts: if texts:
message["content"] = texts message["content"] = texts

View file

@ -41,7 +41,6 @@ def convert_content_list_to_str(message: AllMessageValues) -> str:
""" """
- handles scenario where content is list and not string - handles scenario where content is list and not string
- content list is just text, and no images - content list is just text, and no images
- if image passed in, then just return as is (user-intended)
Motivation: mistral api + azure ai don't support content as a list Motivation: mistral api + azure ai don't support content as a list
""" """
@ -59,6 +58,19 @@ def convert_content_list_to_str(message: AllMessageValues) -> str:
return texts return texts
def _audio_or_image_in_message_content(message: AllMessageValues) -> bool:
"""
Checks if message content contains an image or audio
"""
message_content = message.get("content")
if message_content:
if message_content is not None and isinstance(message_content, list):
for c in message_content:
if c.get("type") == "image_url" or c.get("type") == "input_audio":
return True
return False
def convert_openai_message_to_only_content_messages( def convert_openai_message_to_only_content_messages(
messages: List[AllMessageValues], messages: List[AllMessageValues],
) -> List[Dict[str, str]]: ) -> List[Dict[str, str]]:

View file

@ -11,6 +11,9 @@ from dotenv import load_dotenv
import litellm.types import litellm.types
import litellm.types.utils import litellm.types.utils
from litellm.llms.anthropic.chat import ModelResponseIterator from litellm.llms.anthropic.chat import ModelResponseIterator
import httpx
import json
from respx import MockRouter
load_dotenv() load_dotenv()
import io import io
@ -39,3 +42,97 @@ def test_map_azure_model_group(model_group_header, expected_model):
config = AzureAICohereConfig() config = AzureAICohereConfig()
assert config._map_azure_model_group(model_group_header) == expected_model assert config._map_azure_model_group(model_group_header) == expected_model
@pytest.mark.asyncio
@pytest.mark.respx
async def test_azure_ai_with_image_url(respx_mock: MockRouter):
"""
Important test:
Test that Azure AI studio can handle image_url passed when content is a list containing both text and image_url
"""
litellm.set_verbose = True
# Mock response based on the actual API response
mock_response = {
"id": "cmpl-53860ea1efa24d2883555bfec13d2254",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": None,
"message": {
"content": "The image displays a graphic with the text 'LiteLLM' in black",
"role": "assistant",
"refusal": None,
"audio": None,
"function_call": None,
"tool_calls": None,
},
}
],
"created": 1731801937,
"model": "phi35-vision-instruct",
"object": "chat.completion",
"usage": {
"completion_tokens": 69,
"prompt_tokens": 617,
"total_tokens": 686,
"completion_tokens_details": None,
"prompt_tokens_details": None,
},
}
# Mock the API request
mock_request = respx_mock.post(
"https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com"
).mock(return_value=httpx.Response(200, json=mock_response))
response = await litellm.acompletion(
model="azure_ai/Phi-3-5-vision-instruct-dcvov",
api_base="https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is in this image?",
},
{
"type": "image_url",
"image_url": {
"url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
},
},
],
},
],
api_key="fake-api-key",
)
# Verify the request was made
assert mock_request.called
# Check the request body
request_body = json.loads(mock_request.calls[0].request.content)
assert request_body == {
"model": "Phi-3-5-vision-instruct-dcvov",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "What is in this image?"},
{
"type": "image_url",
"image_url": {
"url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
},
},
],
}
],
}
print(f"response: {response}")