From 96b0e324e306c78611cdb4add6f4134fc7a2686c Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 5 Nov 2024 11:25:09 +0530
Subject: [PATCH] (fix) Vertex Improve Performance when using `image_url` 
 (#6593)

* fix transformation vertex

* test test_process_gemini_image

* test_image_completion_request

* testing fix - bedrock has deprecated cohere.command-text-v14

* fix vertex pdf
---
 .../gemini/transformation.py                  |  37 ++++-
 tests/llm_translation/test_vertex.py          | 146 ++++++++++++++++++
 2 files changed, 180 insertions(+), 3 deletions(-)

diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
index 66ab07674..f828d93c8 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/transformation.py
@@ -51,6 +51,9 @@ from ..common_utils import (
 
 
 def _process_gemini_image(image_url: str) -> PartType:
+    """
+    Given an image URL, return the appropriate PartType for Gemini
+    """
     try:
         # GCS URIs
         if "gs://" in image_url:
@@ -68,9 +71,14 @@ def _process_gemini_image(image_url: str) -> PartType:
             file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
 
             return PartType(file_data=file_data)
-
-        # Direct links
-        elif "https:/" in image_url or "base64" in image_url:
+        elif (
+            "https://" in image_url
+            and (image_type := _get_image_mime_type_from_url(image_url)) is not None
+        ):
+            file_data = FileDataType(file_uri=image_url, mime_type=image_type)
+            return PartType(file_data=file_data)
+        elif "https://" in image_url or "base64" in image_url:
+            # https links for unsupported mime types and base64 images
             image = convert_to_anthropic_image_obj(image_url)
             _blob = BlobType(data=image["data"], mime_type=image["media_type"])
             return PartType(inline_data=_blob)
@@ -79,6 +87,29 @@ def _process_gemini_image(image_url: str) -> PartType:
         raise e
 
 
+def _get_image_mime_type_from_url(url: str) -> Optional[str]:
+    """
+    Get mime type for common image URLs
+    See gemini mime types: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/image-understanding#image-requirements
+
+    Supported by Gemini:
+     - PNG (`image/png`)
+     - JPEG (`image/jpeg`)
+     - WebP (`image/webp`)
+    Example:
+        url = https://example.com/image.jpg
+        Returns: image/jpeg
+    """
+    url = url.lower()
+    if url.endswith((".jpg", ".jpeg")):
+        return "image/jpeg"
+    elif url.endswith(".png"):
+        return "image/png"
+    elif url.endswith(".webp"):
+        return "image/webp"
+    return None
+
+
 def _gemini_convert_messages_with_history(  # noqa: PLR0915
     messages: List[AllMessageValues],
 ) -> List[ContentType]:
diff --git a/tests/llm_translation/test_vertex.py b/tests/llm_translation/test_vertex.py
index 467be4ddf..a06179a49 100644
--- a/tests/llm_translation/test_vertex.py
+++ b/tests/llm_translation/test_vertex.py
@@ -15,6 +15,7 @@ sys.path.insert(
 import pytest
 import litellm
 from litellm import get_optional_params
+from litellm.llms.custom_httpx.http_handler import HTTPHandler
 
 
 def test_completion_pydantic_obj_2():
@@ -1171,3 +1172,148 @@ def test_logprobs():
         print(resp)
 
         assert resp.choices[0].logprobs is not None
+
+
+def test_process_gemini_image():
+    """Test the _process_gemini_image function for different image sources"""
+    from litellm.llms.vertex_ai_and_google_ai_studio.gemini.transformation import (
+        _process_gemini_image,
+    )
+    from litellm.types.llms.vertex_ai import PartType, FileDataType, BlobType
+
+    # Test GCS URI
+    gcs_result = _process_gemini_image("gs://bucket/image.png")
+    assert gcs_result["file_data"] == FileDataType(
+        mime_type="image/png", file_uri="gs://bucket/image.png"
+    )
+
+    # Test HTTPS JPG URL
+    https_result = _process_gemini_image("https://example.com/image.jpg")
+    print("https_result JPG", https_result)
+    assert https_result["file_data"] == FileDataType(
+        mime_type="image/jpeg", file_uri="https://example.com/image.jpg"
+    )
+
+    # Test HTTPS PNG URL
+    https_result = _process_gemini_image("https://example.com/image.png")
+    print("https_result PNG", https_result)
+    assert https_result["file_data"] == FileDataType(
+        mime_type="image/png", file_uri="https://example.com/image.png"
+    )
+
+    # Test base64 image
+    base64_image = "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
+    base64_result = _process_gemini_image(base64_image)
+    print("base64_result", base64_result)
+    assert base64_result["inline_data"]["mime_type"] == "image/jpeg"
+    assert base64_result["inline_data"]["data"] == "/9j/4AAQSkZJRg..."
+
+
+def test_get_image_mime_type_from_url():
+    """Test the _get_image_mime_type_from_url function for different image URLs"""
+    from litellm.llms.vertex_ai_and_google_ai_studio.gemini.transformation import (
+        _get_image_mime_type_from_url,
+    )
+
+    # Test JPEG images
+    assert (
+        _get_image_mime_type_from_url("https://example.com/image.jpg") == "image/jpeg"
+    )
+    assert (
+        _get_image_mime_type_from_url("https://example.com/image.jpeg") == "image/jpeg"
+    )
+    assert (
+        _get_image_mime_type_from_url("https://example.com/IMAGE.JPG") == "image/jpeg"
+    )
+
+    # Test PNG images
+    assert _get_image_mime_type_from_url("https://example.com/image.png") == "image/png"
+    assert _get_image_mime_type_from_url("https://example.com/IMAGE.PNG") == "image/png"
+
+    # Test WebP images
+    assert (
+        _get_image_mime_type_from_url("https://example.com/image.webp") == "image/webp"
+    )
+    assert (
+        _get_image_mime_type_from_url("https://example.com/IMAGE.WEBP") == "image/webp"
+    )
+
+    # Test unsupported formats
+    assert _get_image_mime_type_from_url("https://example.com/image.gif") is None
+    assert _get_image_mime_type_from_url("https://example.com/image.bmp") is None
+    assert _get_image_mime_type_from_url("https://example.com/image") is None
+    assert _get_image_mime_type_from_url("invalid_url") is None
+
+
+@pytest.mark.parametrize(
+    "image_url", ["https://example.com/image.jpg", "https://example.com/image.png"]
+)
+def test_image_completion_request(image_url):
+    """https:// .jpg, .png images are passed directly to the model"""
+    from unittest.mock import patch, Mock
+    import litellm
+    from litellm.llms.vertex_ai_and_google_ai_studio.gemini.transformation import (
+        _get_image_mime_type_from_url,
+    )
+
+    # Mock response data
+    mock_response = Mock()
+    mock_response.json.return_value = {
+        "candidates": [{"content": {"parts": [{"text": "This is a sunflower"}]}}],
+        "usageMetadata": {
+            "promptTokenCount": 11,
+            "candidatesTokenCount": 50,
+            "totalTokenCount": 61,
+        },
+        "modelVersion": "gemini-1.5-pro",
+    }
+    mock_response.raise_for_status = MagicMock()
+    mock_response.status_code = 200
+
+    # Expected request body
+    expected_request_body = {
+        "contents": [
+            {
+                "role": "user",
+                "parts": [
+                    {"text": "Whats in this image?"},
+                    {
+                        "file_data": {
+                            "file_uri": image_url,
+                            "mime_type": _get_image_mime_type_from_url(image_url),
+                        }
+                    },
+                ],
+            }
+        ],
+        "system_instruction": {"parts": [{"text": "Be a good bot"}]},
+        "generationConfig": {},
+    }
+
+    messages = [
+        {"role": "system", "content": "Be a good bot"},
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Whats in this image?"},
+                {"type": "image_url", "image_url": {"url": image_url}},
+            ],
+        },
+    ]
+
+    client = HTTPHandler()
+    with patch.object(client, "post", new=MagicMock()) as mock_post:
+        mock_post.return_value = mock_response
+        try:
+            litellm.completion(
+                model="gemini/gemini-1.5-pro",
+                messages=messages,
+                client=client,
+            )
+        except Exception as e:
+            print(e)
+
+        # Assert the request body matches expected
+        mock_post.assert_called_once()
+        print("mock_post.call_args.kwargs['json']", mock_post.call_args.kwargs["json"])
+        assert mock_post.call_args.kwargs["json"] == expected_request_body