mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
141 lines
4.5 KiB
Python
141 lines
4.5 KiB
Python
#### What this tests ####
|
||
# This tests litellm.token_counter() function
|
||
|
||
import sys, os
|
||
import traceback
|
||
import pytest
|
||
|
||
sys.path.insert(
|
||
0, os.path.abspath("../..")
|
||
) # Adds the parent directory to the system path
|
||
import time
|
||
from litellm import token_counter, encode, decode
|
||
|
||
|
||
def test_token_counter_normal_plus_function_calling():
|
||
try:
|
||
messages = [
|
||
{"role": "system", "content": "System prompt"},
|
||
{"role": "user", "content": "content1"},
|
||
{"role": "assistant", "content": "content2"},
|
||
{"role": "user", "content": "conten3"},
|
||
{
|
||
"role": "assistant",
|
||
"content": None,
|
||
"tool_calls": [
|
||
{
|
||
"id": "call_E0lOb1h6qtmflUyok4L06TgY",
|
||
"function": {
|
||
"arguments": '{"query":"search query","domain":"google.ca","gl":"ca","hl":"en"}',
|
||
"name": "SearchInternet",
|
||
},
|
||
"type": "function",
|
||
}
|
||
],
|
||
},
|
||
{
|
||
"tool_call_id": "call_E0lOb1h6qtmflUyok4L06TgY",
|
||
"role": "tool",
|
||
"name": "SearchInternet",
|
||
"content": "tool content",
|
||
},
|
||
]
|
||
tokens = token_counter(model="gpt-3.5-turbo", messages=messages)
|
||
print(f"tokens: {tokens}")
|
||
except Exception as e:
|
||
pytest.fail(f"An exception occurred - {str(e)}")
|
||
|
||
|
||
# test_token_counter_normal_plus_function_calling()
|
||
|
||
|
||
def test_tokenizers():
|
||
try:
|
||
### test the openai, claude, cohere and llama2 tokenizers.
|
||
### The tokenizer value should be different for all
|
||
sample_text = "Hellö World, this is my input string!"
|
||
|
||
# openai tokenizer
|
||
openai_tokens = token_counter(model="gpt-3.5-turbo", text=sample_text)
|
||
|
||
# claude tokenizer
|
||
claude_tokens = token_counter(model="claude-instant-1", text=sample_text)
|
||
|
||
# cohere tokenizer
|
||
cohere_tokens = token_counter(model="command-nightly", text=sample_text)
|
||
|
||
# llama2 tokenizer
|
||
llama2_tokens = token_counter(
|
||
model="meta-llama/Llama-2-7b-chat", text=sample_text
|
||
)
|
||
|
||
print(
|
||
f"openai tokens: {openai_tokens}; claude tokens: {claude_tokens}; cohere tokens: {cohere_tokens}; llama2 tokens: {llama2_tokens}"
|
||
)
|
||
|
||
# assert that all token values are different
|
||
assert (
|
||
openai_tokens != cohere_tokens != llama2_tokens
|
||
), "Token values are not different."
|
||
|
||
print("test tokenizer: It worked!")
|
||
except Exception as e:
|
||
pytest.fail(f"An exception occured: {e}")
|
||
|
||
|
||
# test_tokenizers()
|
||
|
||
|
||
def test_encoding_and_decoding():
|
||
try:
|
||
sample_text = "Hellö World, this is my input string!"
|
||
# openai encoding + decoding
|
||
openai_tokens = encode(model="gpt-3.5-turbo", text=sample_text)
|
||
openai_text = decode(model="gpt-3.5-turbo", tokens=openai_tokens)
|
||
|
||
assert openai_text == sample_text
|
||
|
||
# claude encoding + decoding
|
||
claude_tokens = encode(model="claude-instant-1", text=sample_text)
|
||
claude_text = decode(model="claude-instant-1", tokens=claude_tokens.ids)
|
||
|
||
assert claude_text == sample_text
|
||
|
||
# cohere encoding + decoding
|
||
cohere_tokens = encode(model="command-nightly", text=sample_text)
|
||
cohere_text = decode(model="command-nightly", tokens=cohere_tokens.ids)
|
||
|
||
assert cohere_text == sample_text
|
||
|
||
# llama2 encoding + decoding
|
||
llama2_tokens = encode(model="meta-llama/Llama-2-7b-chat", text=sample_text)
|
||
llama2_text = decode(
|
||
model="meta-llama/Llama-2-7b-chat", tokens=llama2_tokens.ids
|
||
)
|
||
|
||
assert llama2_text == sample_text
|
||
except Exception as e:
|
||
pytest.fail(f"An exception occured: {e}")
|
||
|
||
|
||
# test_encoding_and_decoding()
|
||
|
||
|
||
def test_gpt_vision_token_counting():
|
||
messages = [
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": "What’s in this image?"},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
||
},
|
||
],
|
||
}
|
||
]
|
||
tokens = token_counter(model="gpt-4-vision-preview", messages=messages)
|
||
print(f"tokens: {tokens}")
|
||
|
||
|
||
# test_gpt_vision_token_counting()
|