Merge pull request #4119 from BerriAI/litellm_tiktoken_bump

feat(utils.py): bump tiktoken dependency to 0.7.0 (gpt-4o token counting support)
2024-06-11 18:24:58 -07:00 · 2024-06-11 18:24:58 -07:00 · 7c23077a71
commit 7c23077a71
parent a53ba9b2fb b0daeba8e7
5 changed files with 200042 additions and 10 deletions
--- a/litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790
+++ b/litellm/llms/tokenizers/fb374d419588a4632f3f557e76b4b70aebbca790
--- a/litellm/tests/test_token_counter.py
+++ b/litellm/tests/test_token_counter.py
@ -187,12 +187,43 @@ def test_load_test_token_counter(model):
    print("model={}, total test time={}".format(model, total_time))
    assert total_time < 10, f"Total encoding time > 10s, {total_time}"

+
 def test_openai_token_with_image_and_text():
    model = "gpt-4o"
-    full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n    Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
+    full_request = {
+        "model": "gpt-4o",
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "json",
+                    "parameters": {
+                        "type": "object",
+                        "required": ["clause"],
+                        "properties": {"clause": {"type": "string"}},
+                    },
+                    "description": "Respond with a JSON object.",
+                },
+            }
+        ],
+        "logprobs": False,
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "text": "\n    Just some long text, long long text, and you know it will be longer than 7 tokens definetly.",
+                        "type": "text",
+                    }
+                ],
+            }
+        ],
+        "tool_choice": {"type": "function", "function": {"name": "json"}},
+        "exclude_models": [],
+        "disable_fallback": False,
+        "exclude_providers": [],
+    }
    messages = full_request.get("messages", [])

    token_count = token_counter(model=model, messages=messages)
    print(token_count)
-    
-test_openai_token_with_image_and_text()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -30,7 +30,7 @@ from dataclasses import (
    dataclass,
    field,
 )
-
+import os
 import litellm._service_logger  # for storing API inputs, outputs, and metadata
 from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
 from litellm.caching import DualCache
@ -49,9 +49,9 @@ except (ImportError, AttributeError):

    filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")

-os.environ["TIKTOKEN_CACHE_DIR"] = (
-    filename  # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
-)
+os.environ["TIKTOKEN_CACHE_DIR"] = os.getenv(
+    "CUSTOM_TIKTOKEN_CACHE_DIR", filename
+)  # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071

 encoding = tiktoken.get_encoding("cl100k_base")
 from importlib import resources
@ -4031,7 +4031,10 @@ def openai_token_counter(
    """
    print_verbose(f"LiteLLM: Utils - Counting tokens for OpenAI model={model}")
    try:
-        encoding = tiktoken.encoding_for_model(model)
+        if "gpt-4o" in model:
+            encoding = tiktoken.get_encoding("o200k_base")
+        else:
+            encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print_verbose("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -19,7 +19,7 @@ documentation = "https://docs.litellm.ai"
 python = ">=3.8.1,<4.0, !=3.9.7"
 openai = ">=1.27.0"
 python-dotenv = ">=0.2.0"
-tiktoken = ">=0.4.0"
+tiktoken = ">=0.7.0"
 importlib-metadata = ">=6.8.0"
 tokenizers = "*"
 click = "*"
--- a/requirements.txt
+++ b/requirements.txt
@ -34,7 +34,7 @@ opentelemetry-exporter-otlp==1.25.0

 ### LITELLM PACKAGE DEPENDENCIES
 python-dotenv==1.0.0 # for env 
-tiktoken==0.6.0 # for calculating usage
+tiktoken==0.7.0 # for calculating usage
 importlib-metadata==6.8.0 # for random utils
 tokenizers==0.14.0 # for calculating usage
 click==8.1.7 # for proxy cli