(feat) add support for gemini base64 images

2025-04-26 11:14:04 +00:00 · 2024-02-29 14:42:20 -08:00 · 2024-02-29 14:42:20 -08:00 · 8a96aa4ef0
commit 8a96aa4ef0
parent 3d2150afcc
3 changed files with 57 additions and 0 deletions
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -225,6 +225,17 @@ def _gemini_vision_convert_messages(messages: list):
                part_mime = "video/mp4"
                google_clooud_part = Part.from_uri(img, mime_type=part_mime)
                processed_images.append(google_clooud_part)
+            elif "base64" in img:
+                # Case 4: Images with base64 encoding
+                import base64
+
+                # base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
+                img_without_base_64 = img.split(",")[1]
+                decoded_img = base64.b64decode(img_without_base_64)
+                processed_image = Part.from_data(
+                    data=decoded_img, mime_type="image/jpeg"
+                )
+                processed_images.append(processed_image)
        return prompt, processed_images
    except Exception as e:
        raise e
--- a/litellm/tests/cached_logo.jpg
+++ b/litellm/tests/cached_logo.jpg
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@ -336,6 +336,52 @@ def test_gemini_pro_vision():
 # test_gemini_pro_vision()


+def encode_image(image_path):
+    import base64
+
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
+
+
+@pytest.mark.skip(
+    reason="we already test gemini-pro-vision, this is just another way to pass images"
+)
+def test_gemini_pro_vision_base64():
+    try:
+        load_vertex_ai_credentials()
+        litellm.set_verbose = True
+        litellm.num_retries = 3
+        image_path = "cached_logo.jpg"
+        # Getting the base64 string
+        base64_image = encode_image(image_path)
+        resp = litellm.completion(
+            model="vertex_ai/gemini-pro-vision",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Whats in this image?"},
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "data:image/jpeg;base64," + base64_image
+                            },
+                        },
+                    ],
+                }
+            ],
+        )
+        print(resp)
+
+        prompt_tokens = resp.usage.prompt_tokens
+
+    except Exception as e:
+        if "500 Internal error encountered.'" in str(e):
+            pass
+        else:
+            pytest.fail(f"An exception occurred - {str(e)}")
+
+
 def test_gemini_pro_function_calling():
    load_vertex_ai_credentials()
    tools = [