fix(factory.py): flexible handling of image file type in b64 encode

Fixes https://github.com/BerriAI/litellm/issues/4589
This commit is contained in:
Krrish Dholakia 2024-07-08 09:45:08 -07:00
parent dcf7410dde
commit 5c7107e44f
3 changed files with 57 additions and 7 deletions

View file

@ -704,22 +704,19 @@ def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsing
try:
if openai_image_url.startswith("http"):
openai_image_url = convert_url_to_base64(url=openai_image_url)
# Extract the base64 image data
base64_data = openai_image_url.split("data:image/")[1].split(";base64,")[1]
# Infer image format from the URL
image_format = openai_image_url.split("data:image/")[1].split(";base64,")[0]
# Extract the media type and base64 data
media_type, base64_data = openai_image_url.split("data:")[1].split(";base64,")
return GenericImageParsingChunk(
type="base64",
media_type=f"image/{image_format}",
media_type=media_type,
data=base64_data,
)
except Exception as e:
if "Error: Unable to fetch image from URL" in str(e):
raise e
raise Exception(
"""Image url not in expected format. Example Expected input - "image_url": "data:image/jpeg;base64,{base64_image}". Supported formats - ['image/jpeg', 'image/png', 'image/gif', 'image/webp'] """
"""Image url not in expected format. Example Expected input - "image_url": "data:image/jpeg;base64,{base64_image}". Supported formats - ['image/jpeg', 'image/png', 'image/gif', 'image/webp']."""
)

View file

@ -848,6 +848,45 @@ def test_completion_function_plus_image(model):
print(response)
@pytest.mark.parametrize(
"model", ["gemini/gemini-1.5-pro"] # "claude-3-sonnet-20240229",
)
def test_completion_function_plus_pdf(model):
litellm.set_verbose = True
try:
import base64
import requests
# URL of the file
url = "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
# Download the file
response = requests.get(url)
file_data = response.content
encoded_file = base64.b64encode(file_data).decode("utf-8")
image_content = [
{"type": "text", "text": "What's this file about?"},
{
"type": "image_url",
"image_url": {"url": f"data:application/pdf;base64,{encoded_file}"},
},
]
image_message = {"role": "user", "content": image_content}
response = completion(
model=model,
messages=[image_message],
stream=False,
)
print(response)
except litellm.InternalServerError:
pass
@pytest.mark.parametrize(
"provider",
["azure", "azure_ai"],

View file

@ -15,6 +15,7 @@ from litellm.llms.prompt_templates.factory import (
anthropic_messages_pt,
anthropic_pt,
claude_2_1_pt,
convert_to_anthropic_image_obj,
convert_url_to_base64,
llama_2_chat_pt,
prompt_factory,
@ -163,3 +164,16 @@ def test_convert_url_to_img():
)
assert "image/jpeg" in response_url
@pytest.mark.parametrize(
"url, expected_media_type",
[
("data:image/jpeg;base64,1234", "image/jpeg"),
("data:application/pdf;base64,1234", "application/pdf"),
],
)
def test_base64_image_input(url, expected_media_type):
response = convert_to_anthropic_image_obj(openai_image_url=url)
assert response["media_type"] == expected_media_type