forked from phoenix/litellm-mirror
(fix) Azure AI Studio - using image_url
in content with both text and image_url (#6774)
* use helper _audio_or_image_in_message_content * update azure ai transf * test_azure_ai_with_image_url
This commit is contained in:
parent
0c7360d470
commit
160357d54c
3 changed files with 128 additions and 3 deletions
|
@ -3,7 +3,10 @@ from typing import List, Optional, Tuple
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.llms.OpenAI.openai import OpenAIConfig
|
from litellm.llms.OpenAI.openai import OpenAIConfig
|
||||||
from litellm.llms.prompt_templates.common_utils import convert_content_list_to_str
|
from litellm.llms.prompt_templates.common_utils import (
|
||||||
|
_audio_or_image_in_message_content,
|
||||||
|
convert_content_list_to_str,
|
||||||
|
)
|
||||||
from litellm.secret_managers.main import get_secret_str
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import AllMessageValues
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
from litellm.types.utils import ProviderField
|
from litellm.types.utils import ProviderField
|
||||||
|
@ -27,8 +30,21 @@ class AzureAIStudioConfig(OpenAIConfig):
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
def _transform_messages(self, messages: List[AllMessageValues]) -> List:
|
def _transform_messages(
|
||||||
|
self,
|
||||||
|
messages: List[AllMessageValues],
|
||||||
|
) -> List:
|
||||||
|
"""
|
||||||
|
- Azure AI Studio doesn't support content as a list. This handles:
|
||||||
|
1. Transforms list content to a string.
|
||||||
|
2. If message contains an image or audio, send as is (user-intended)
|
||||||
|
"""
|
||||||
for message in messages:
|
for message in messages:
|
||||||
|
|
||||||
|
# Do nothing if the message contains an image or audio
|
||||||
|
if _audio_or_image_in_message_content(message):
|
||||||
|
continue
|
||||||
|
|
||||||
texts = convert_content_list_to_str(message=message)
|
texts = convert_content_list_to_str(message=message)
|
||||||
if texts:
|
if texts:
|
||||||
message["content"] = texts
|
message["content"] = texts
|
||||||
|
|
|
@ -41,7 +41,6 @@ def convert_content_list_to_str(message: AllMessageValues) -> str:
|
||||||
"""
|
"""
|
||||||
- handles scenario where content is list and not string
|
- handles scenario where content is list and not string
|
||||||
- content list is just text, and no images
|
- content list is just text, and no images
|
||||||
- if image passed in, then just return as is (user-intended)
|
|
||||||
|
|
||||||
Motivation: mistral api + azure ai don't support content as a list
|
Motivation: mistral api + azure ai don't support content as a list
|
||||||
"""
|
"""
|
||||||
|
@ -59,6 +58,19 @@ def convert_content_list_to_str(message: AllMessageValues) -> str:
|
||||||
return texts
|
return texts
|
||||||
|
|
||||||
|
|
||||||
|
def _audio_or_image_in_message_content(message: AllMessageValues) -> bool:
|
||||||
|
"""
|
||||||
|
Checks if message content contains an image or audio
|
||||||
|
"""
|
||||||
|
message_content = message.get("content")
|
||||||
|
if message_content:
|
||||||
|
if message_content is not None and isinstance(message_content, list):
|
||||||
|
for c in message_content:
|
||||||
|
if c.get("type") == "image_url" or c.get("type") == "input_audio":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def convert_openai_message_to_only_content_messages(
|
def convert_openai_message_to_only_content_messages(
|
||||||
messages: List[AllMessageValues],
|
messages: List[AllMessageValues],
|
||||||
) -> List[Dict[str, str]]:
|
) -> List[Dict[str, str]]:
|
||||||
|
|
|
@ -11,6 +11,9 @@ from dotenv import load_dotenv
|
||||||
import litellm.types
|
import litellm.types
|
||||||
import litellm.types.utils
|
import litellm.types.utils
|
||||||
from litellm.llms.anthropic.chat import ModelResponseIterator
|
from litellm.llms.anthropic.chat import ModelResponseIterator
|
||||||
|
import httpx
|
||||||
|
import json
|
||||||
|
from respx import MockRouter
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
import io
|
import io
|
||||||
|
@ -39,3 +42,97 @@ def test_map_azure_model_group(model_group_header, expected_model):
|
||||||
|
|
||||||
config = AzureAICohereConfig()
|
config = AzureAICohereConfig()
|
||||||
assert config._map_azure_model_group(model_group_header) == expected_model
|
assert config._map_azure_model_group(model_group_header) == expected_model
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.respx
|
||||||
|
async def test_azure_ai_with_image_url(respx_mock: MockRouter):
|
||||||
|
"""
|
||||||
|
Important test:
|
||||||
|
|
||||||
|
Test that Azure AI studio can handle image_url passed when content is a list containing both text and image_url
|
||||||
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
# Mock response based on the actual API response
|
||||||
|
mock_response = {
|
||||||
|
"id": "cmpl-53860ea1efa24d2883555bfec13d2254",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": None,
|
||||||
|
"message": {
|
||||||
|
"content": "The image displays a graphic with the text 'LiteLLM' in black",
|
||||||
|
"role": "assistant",
|
||||||
|
"refusal": None,
|
||||||
|
"audio": None,
|
||||||
|
"function_call": None,
|
||||||
|
"tool_calls": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1731801937,
|
||||||
|
"model": "phi35-vision-instruct",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 69,
|
||||||
|
"prompt_tokens": 617,
|
||||||
|
"total_tokens": 686,
|
||||||
|
"completion_tokens_details": None,
|
||||||
|
"prompt_tokens_details": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mock the API request
|
||||||
|
mock_request = respx_mock.post(
|
||||||
|
"https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com"
|
||||||
|
).mock(return_value=httpx.Response(200, json=mock_response))
|
||||||
|
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="azure_ai/Phi-3-5-vision-instruct-dcvov",
|
||||||
|
api_base="https://Phi-3-5-vision-instruct-dcvov.eastus2.models.ai.azure.com",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is in this image?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
api_key="fake-api-key",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify the request was made
|
||||||
|
assert mock_request.called
|
||||||
|
|
||||||
|
# Check the request body
|
||||||
|
request_body = json.loads(mock_request.calls[0].request.content)
|
||||||
|
assert request_body == {
|
||||||
|
"model": "Phi-3-5-vision-instruct-dcvov",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What is in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://litellm-listing.s3.amazonaws.com/litellm_logo.png"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue