diff --git a/litellm/tests/test_token_counter.py b/litellm/tests/test_token_counter.py index b6dca32c6..b32e63fbf 100644 --- a/litellm/tests/test_token_counter.py +++ b/litellm/tests/test_token_counter.py @@ -356,3 +356,22 @@ def test_gpt_4o_token_counter(): ) mock_client.assert_called() + + +@pytest.mark.parametrize( + "img_url", + [ + "https://blog.purpureus.net/assets/blog/personal_key_rotation/simplified-asset-graph.jpg", + "", + ], +) +def test_img_url_token_counter(img_url): + + from litellm.utils import get_image_dimensions + + width, height = get_image_dimensions(data=img_url) + + print(width, height) + + assert width is not None + assert height is not None diff --git a/litellm/utils.py b/litellm/utils.py index 40564c107..c2dcc0eb3 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -14,7 +14,9 @@ import binascii import copy import datetime import hashlib +import imghdr import inspect +import io import itertools import json import logging @@ -1797,35 +1799,41 @@ def calculate_tiles_needed( def get_image_dimensions(data): img_data = None - # Check if data is a URL by trying to parse it try: - response = requests.get(data) - response.raise_for_status() # Check if the request was successful - img_data = response.content + # Try to open as URL + # Try to open as URL + client = HTTPHandler(concurrent_limit=1) + response = client.get(data) + img_data = response.read() except Exception: - # Data is not a URL, handle as base64 + # If not URL, assume it's base64 header, encoded = data.split(",", 1) img_data = base64.b64decode(encoded) - # Try to determine dimensions from headers - # This is a very simplistic check, primarily works with PNG and non-progressive JPEG - if img_data[:8] == b"\x89PNG\r\n\x1a\n": - # PNG Image; width and height are 4 bytes each and start at offset 16 - width, height = struct.unpack(">ii", img_data[16:24]) - return width, height - elif img_data[:2] == b"\xff\xd8": - # JPEG Image; for dimensions, SOF0 block (0xC0) gives dimensions at offset 3 for length, and then 5 and 7 for height and width - # This will NOT find dimensions for all JPEGs (e.g., progressive JPEGs) - # Find SOF0 marker (0xFF followed by 0xC0) - sof = re.search(b"\xff\xc0....", img_data) - if sof: - # Parse SOF0 block to find dimensions - height, width = struct.unpack(">HH", sof.group()[5:9]) - return width, height - else: - return None, None + img_type = imghdr.what(None, h=img_data) + + if img_type == "png": + w, h = struct.unpack(">LL", img_data[16:24]) + return w, h + elif img_type == "gif": + w, h = struct.unpack("H", fhandle.read(2))[0] - 2 + fhandle.seek(1, 1) + h, w = struct.unpack(">HH", fhandle.read(4)) + return w, h else: - # Unsupported format return None, None