litellm-mirror/litellm/tests/test_stream_chunk_builder.py

198 lines
5.6 KiB
Python

import asyncio
import os
import sys
import time
import traceback
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import os
import dotenv
import pytest
from openai import OpenAI
import litellm
from litellm import completion, stream_chunk_builder
dotenv.load_dotenv()
user_message = "What is the current weather in Boston?"
messages = [{"content": user_message, "role": "user"}]
function_schema = {
"name": "get_weather",
"description": "gets the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
},
"required": ["location"],
},
}
tools_schema = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
# def test_stream_chunk_builder_tools():
# try:
# litellm.set_verbose = False
# response = client.chat.completions.create(
# model="gpt-3.5-turbo",
# messages=messages,
# tools=tools_schema,
# # stream=True,
# # complete_response=True # runs stream_chunk_builder under-the-hood
# )
# print(f"response: {response}")
# print(f"response usage: {response.usage}")
# except Exception as e:
# pytest.fail(f"An exception occurred - {str(e)}")
# test_stream_chunk_builder_tools()
def test_stream_chunk_builder_litellm_function_call():
try:
litellm.set_verbose = False
response = litellm.completion(
model="gpt-3.5-turbo",
messages=messages,
functions=[function_schema],
# stream=True,
# complete_response=True # runs stream_chunk_builder under-the-hood
)
print(f"response: {response}")
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
# test_stream_chunk_builder_litellm_function_call()
def test_stream_chunk_builder_litellm_tool_call():
try:
litellm.set_verbose = True
response = litellm.completion(
model="gpt-3.5-turbo",
messages=messages,
tools=tools_schema,
stream=True,
complete_response=True,
)
print(f"complete response: {response}")
print(f"complete response usage: {response.usage}")
assert response.usage.completion_tokens > 0
assert response.usage.prompt_tokens > 0
assert (
response.usage.total_tokens
== response.usage.completion_tokens + response.usage.prompt_tokens
)
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
# test_stream_chunk_builder_litellm_tool_call()
def test_stream_chunk_builder_litellm_tool_call_regular_message():
try:
messages = [{"role": "user", "content": "Hey, how's it going?"}]
# litellm.set_verbose = True
response = litellm.completion(
model="gpt-3.5-turbo",
messages=messages,
tools=tools_schema,
stream=True,
complete_response=True,
)
print(f"complete response: {response}")
print(f"complete response usage: {response.usage}")
assert response.usage.completion_tokens > 0
assert response.usage.prompt_tokens > 0
assert (
response.usage.total_tokens
== response.usage.completion_tokens + response.usage.prompt_tokens
)
# check provider is in hidden params
print("hidden params", response._hidden_params)
assert response._hidden_params["custom_llm_provider"] == "openai"
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")
# test_stream_chunk_builder_litellm_tool_call_regular_message()
def test_stream_chunk_builder_litellm_usage_chunks():
"""
Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks
"""
messages = [
{"role": "user", "content": "Tell me the funniest joke you know."},
{
"role": "assistant",
"content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n",
},
{"role": "user", "content": "I do not know, why?"},
{"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"},
{"role": "user", "content": "\nI am waiting...\n\n...\n"},
]
# make a regular gemini call
response = completion(
model="gemini/gemini-1.5-flash",
messages=messages,
)
usage: litellm.Usage = response.usage
gemini_pt = usage.prompt_tokens
# make a streaming gemini call
response = completion(
model="gemini/gemini-1.5-flash",
messages=messages,
stream=True,
complete_response=True,
stream_options={"include_usage": True},
)
usage: litellm.Usage = response.usage
stream_rebuilt_pt = usage.prompt_tokens
# assert prompt tokens are the same
assert gemini_pt == stream_rebuilt_pt