import asyncio import os import sys import time import traceback import pytest sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import os import dotenv import pytest from openai import OpenAI import litellm from litellm import completion, stream_chunk_builder dotenv.load_dotenv() user_message = "What is the current weather in Boston?" messages = [{"content": user_message, "role": "user"}] function_schema = { "name": "get_weather", "description": "gets the current weather", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", }, }, "required": ["location"], }, } tools_schema = [ { "type": "function", "function": { "name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", }, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, "required": ["location"], }, }, } ] # def test_stream_chunk_builder_tools(): # try: # litellm.set_verbose = False # response = client.chat.completions.create( # model="gpt-3.5-turbo", # messages=messages, # tools=tools_schema, # # stream=True, # # complete_response=True # runs stream_chunk_builder under-the-hood # ) # print(f"response: {response}") # print(f"response usage: {response.usage}") # except Exception as e: # pytest.fail(f"An exception occurred - {str(e)}") # test_stream_chunk_builder_tools() def test_stream_chunk_builder_litellm_function_call(): try: litellm.set_verbose = False response = litellm.completion( model="gpt-3.5-turbo", messages=messages, functions=[function_schema], # stream=True, # complete_response=True # runs stream_chunk_builder under-the-hood ) print(f"response: {response}") except Exception as e: pytest.fail(f"An exception occurred - {str(e)}") # test_stream_chunk_builder_litellm_function_call() def test_stream_chunk_builder_litellm_tool_call(): try: litellm.set_verbose = True response = litellm.completion( model="gpt-3.5-turbo", messages=messages, tools=tools_schema, stream=True, complete_response=True, ) print(f"complete response: {response}") print(f"complete response usage: {response.usage}") assert response.usage.completion_tokens > 0 assert response.usage.prompt_tokens > 0 assert ( response.usage.total_tokens == response.usage.completion_tokens + response.usage.prompt_tokens ) except Exception as e: pytest.fail(f"An exception occurred - {str(e)}") # test_stream_chunk_builder_litellm_tool_call() def test_stream_chunk_builder_litellm_tool_call_regular_message(): try: messages = [{"role": "user", "content": "Hey, how's it going?"}] # litellm.set_verbose = True response = litellm.completion( model="gpt-3.5-turbo", messages=messages, tools=tools_schema, stream=True, complete_response=True, ) print(f"complete response: {response}") print(f"complete response usage: {response.usage}") assert response.usage.completion_tokens > 0 assert response.usage.prompt_tokens > 0 assert ( response.usage.total_tokens == response.usage.completion_tokens + response.usage.prompt_tokens ) # check provider is in hidden params print("hidden params", response._hidden_params) assert response._hidden_params["custom_llm_provider"] == "openai" except Exception as e: pytest.fail(f"An exception occurred - {str(e)}") # test_stream_chunk_builder_litellm_tool_call_regular_message() def test_stream_chunk_builder_litellm_usage_chunks(): """ Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks """ messages = [ {"role": "user", "content": "Tell me the funniest joke you know."}, { "role": "assistant", "content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n", }, {"role": "user", "content": "I do not know, why?"}, {"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"}, {"role": "user", "content": "\nI am waiting...\n\n...\n"}, ] # make a regular gemini call response = completion( model="gemini/gemini-1.5-flash", messages=messages, ) usage: litellm.Usage = response.usage gemini_pt = usage.prompt_tokens # make a streaming gemini call response = completion( model="gemini/gemini-1.5-flash", messages=messages, stream=True, complete_response=True, stream_options={"include_usage": True}, ) usage: litellm.Usage = response.usage stream_rebuilt_pt = usage.prompt_tokens # assert prompt tokens are the same assert gemini_pt == stream_rebuilt_pt