From f4c49755a044e98d0dca550bbe3280918db3ff8e Mon Sep 17 00:00:00 2001 From: Raymond Huang <1415176717@qq.com> Date: Wed, 5 Jun 2024 23:40:55 -0700 Subject: [PATCH 1/2] fix token counter bug --- litellm/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/utils.py b/litellm/utils.py index 178860094..a1ef3352b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4060,6 +4060,7 @@ def openai_token_counter( for c in value: if c["type"] == "text": text += c["text"] + num_tokens += len(encoding.encode(c["text"], disallowed_special=())) elif c["type"] == "image_url": if isinstance(c["image_url"], dict): image_url_dict = c["image_url"] From 968f54a65c3541df1bbb8ed0a035065e03498af5 Mon Sep 17 00:00:00 2001 From: Raymond Huang <1415176717@qq.com> Date: Thu, 6 Jun 2024 10:11:27 -0700 Subject: [PATCH 2/2] test --- litellm/tests/test_token_counter.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/litellm/tests/test_token_counter.py b/litellm/tests/test_token_counter.py index 194dfb8af..a6f7cd761 100644 --- a/litellm/tests/test_token_counter.py +++ b/litellm/tests/test_token_counter.py @@ -186,3 +186,13 @@ def test_load_test_token_counter(model): total_time = end_time - start_time print("model={}, total test time={}".format(model, total_time)) assert total_time < 10, f"Total encoding time > 10s, {total_time}" + +def test_openai_token_with_image_and_text(): + model = "gpt-4o" + full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []} + messages = full_request.get("messages", []) + + token_count = token_counter(model=model, messages=messages) + print(token_count) + +test_openai_token_with_image_and_text() \ No newline at end of file