From 7b99cfe6b9b999db3b2b446d916b95067682fd04 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 30 Nov 2023 18:24:16 -0800 Subject: [PATCH] test(test_token_counter.py): include testing for function calls --- .gitignore | 1 + litellm/tests/test_profiling_router.py | 106 +++++++++++++++++-------- litellm/tests/test_token_counter.py | 19 ++++- 3 files changed, 90 insertions(+), 36 deletions(-) diff --git a/.gitignore b/.gitignore index 62e898dac..befb64508 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ litellm_server/config.yaml litellm/proxy/_secret_config.yaml .aws-sam/ litellm/tests/aiologs.log +litellm/tests/exception_data.txt diff --git a/litellm/tests/test_profiling_router.py b/litellm/tests/test_profiling_router.py index e3b291db7..48ed9cb0e 100644 --- a/litellm/tests/test_profiling_router.py +++ b/litellm/tests/test_profiling_router.py @@ -15,6 +15,8 @@ # from dotenv import load_dotenv # from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks +# # litellm.telemetry = False + # load_dotenv() # logging.basicConfig( @@ -25,6 +27,13 @@ # filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append # ) +# # Dictionary to store exception types and their counts +# exception_counts = {} +# exception_data = [] + +# litellm.telemetry = False + +# num_task_cancelled_errors = 0 # model_list = [{ # "model_name": "azure-model", @@ -55,62 +64,89 @@ # router = Router(model_list=model_list, set_verbose=False, num_retries=3) # async def router_completion(): +# global num_task_cancelled_errors, exception_counts # try: # messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}] # response = await router.acompletion(model="azure-model", messages=messages) # return response # except asyncio.exceptions.CancelledError: +# exception_type = "CancelledError" +# exception_counts[exception_type] = exception_counts.get(exception_type, 0) + 1 # print("Task was cancelled") +# num_task_cancelled_errors += 1 +# exception_data.append({ +# "type": exception_type, +# "traceback": None +# }) # return None # except Exception as e: +# exception_type = type(e).__name__ +# exception_counts[exception_type] = exception_counts.get(exception_type, 0) + 1 +# exception_data.append({ +# "type": exception_type, +# "traceback": traceback.format_exc() +# }) # return None -# async def loadtest_fn(n = 1000): +# async def loadtest_fn(n = 1452): +# global num_task_cancelled_errors, exception_counts # start = time.time() # tasks = [router_completion() for _ in range(n)] # chat_completions = await asyncio.gather(*tasks) # successful_completions = [c for c in chat_completions if c is not None] # print(n, time.time() - start, len(successful_completions)) + +# # Print exception breakdown +# print("Exception Breakdown:") +# for exception_type, count in exception_counts.items(): +# print(f"{exception_type}: {count}") + +# # Store exception_data in a file +# with open('exception_data.txt', 'w') as file: +# for data in exception_data: +# file.write(f"Type: {data['type']}\n") +# if data['traceback']: +# file.write(f"Traceback:\n{data['traceback']}\n\n") -# # loop = asyncio.get_event_loop() -# # loop.set_debug(True) -# # log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds +# loop = asyncio.get_event_loop() +# loop.set_debug(True) +# log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds -# # # Excute the load testing function within the asyncio event loop -# # loop.run_until_complete(loadtest_fn()) +# # Excute the load testing function within the asyncio event loop +# loop.run_until_complete(loadtest_fn()) -# ### SUSTAINED LOAD TESTS ### -# import time, asyncio -# async def make_requests(n): -# tasks = [router_completion() for _ in range(n)] -# print(f"num tasks: {len(tasks)}") -# chat_completions = await asyncio.gather(*tasks) -# successful_completions = [c for c in chat_completions if c is not None] -# print(f"successful_completions: {len(successful_completions)}") -# return successful_completions +# # ### SUSTAINED LOAD TESTS ### +# # import time, asyncio +# # async def make_requests(n): +# # tasks = [router_completion() for _ in range(n)] +# # print(f"num tasks: {len(tasks)}") +# # chat_completions = await asyncio.gather(*tasks) +# # successful_completions = [c for c in chat_completions if c is not None] +# # print(f"successful_completions: {len(successful_completions)}") +# # return successful_completions -# async def main(): -# start_time = time.time() -# total_successful_requests = 0 -# request_limit = 1000 -# batches = 2 # batches of 1k requests -# start = time.time() -# tasks = [] # list to hold all tasks +# # async def main(): +# # start_time = time.time() +# # total_successful_requests = 0 +# # request_limit = 1000 +# # batches = 2 # batches of 1k requests +# # start = time.time() +# # tasks = [] # list to hold all tasks -# async def request_loop(): -# nonlocal tasks -# for _ in range(batches): -# # Make 1,000 requests -# task = asyncio.create_task(make_requests(request_limit)) -# tasks.append(task) +# # async def request_loop(): +# # nonlocal tasks +# # for _ in range(batches): +# # # Make 1,000 requests +# # task = asyncio.create_task(make_requests(request_limit)) +# # tasks.append(task) -# # Introduce a delay to achieve 1,000 requests per second -# await asyncio.sleep(1) +# # # Introduce a delay to achieve 1,000 requests per second +# # await asyncio.sleep(1) -# await request_loop() -# results = await asyncio.gather(*tasks) -# total_successful_requests = sum(len(res) for res in results) +# # await request_loop() +# # results = await asyncio.gather(*tasks) +# # total_successful_requests = sum(len(res) for res in results) -# print(request_limit*batches, time.time() - start, total_successful_requests) +# # print(request_limit*batches, time.time() - start, total_successful_requests) -# asyncio.run(main()) \ No newline at end of file +# # asyncio.run(main()) \ No newline at end of file diff --git a/litellm/tests/test_token_counter.py b/litellm/tests/test_token_counter.py index fbe2b977a..b30e1126d 100644 --- a/litellm/tests/test_token_counter.py +++ b/litellm/tests/test_token_counter.py @@ -11,6 +11,23 @@ import time from litellm import token_counter, encode, decode +def test_token_counter_normal_plus_function_calling(): + try: + messages = [ + {'role': 'system', 'content': "System prompt"}, + {'role': 'user', 'content': 'content1'}, + {'role': 'assistant', 'content': 'content2'}, + {'role': 'user', 'content': 'conten3'}, + {'role': 'assistant', 'content': None, 'tool_calls': [{'id': 'call_E0lOb1h6qtmflUyok4L06TgY', 'function': {'arguments': '{"query":"search query","domain":"google.ca","gl":"ca","hl":"en"}', 'name': 'SearchInternet'}, 'type': 'function'}]}, + {'tool_call_id': 'call_E0lOb1h6qtmflUyok4L06TgY', 'role': 'tool', 'name': 'SearchInternet', 'content': 'tool content'} + ] + tokens = token_counter(model="gpt-3.5-turbo", messages=messages) + print(f"tokens: {tokens}") + except Exception as e: + pytest.fail(f"An exception occurred - {str(e)}") + +test_token_counter_normal_plus_function_calling() + def test_tokenizers(): try: ### test the openai, claude, cohere and llama2 tokenizers. @@ -69,4 +86,4 @@ def test_encoding_and_decoding(): except Exception as e: pytest.fail(f'An exception occured: {e}') -test_encoding_and_decoding() \ No newline at end of file +# test_encoding_and_decoding() \ No newline at end of file