test(test_token_counter.py): include testing for function calls

2023-11-30 18:24:16 -08:00 · 2023-11-30 18:24:16 -08:00 · 7b99cfe6b9
commit 7b99cfe6b9
parent 7f04758bcb
3 changed files with 90 additions and 36 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,3 +17,4 @@ litellm_server/config.yaml
 litellm/proxy/_secret_config.yaml
 .aws-sam/
 litellm/tests/aiologs.log
 litellm/tests/exception_data.txt
--- a/litellm/tests/test_profiling_router.py
+++ b/litellm/tests/test_profiling_router.py
@ -15,6 +15,8 @@
 # from dotenv import load_dotenv
 # from aiodebug import log_slow_callbacks  # Import the aiodebug utility for logging slow callbacks
 # # litellm.telemetry = False
 # load_dotenv()
 # logging.basicConfig(
@ -25,6 +27,13 @@
 #     filemode='w'              # 'w' to overwrite the log file on each run, use 'a' to append
 # )
 # # Dictionary to store exception types and their counts
 # exception_counts = {}
 # exception_data = []
 # litellm.telemetry = False
 # num_task_cancelled_errors = 0 
 # model_list = [{
 #     "model_name": "azure-model",
@ -55,62 +64,89 @@
 # router = Router(model_list=model_list, set_verbose=False, num_retries=3)
 # async def router_completion(): 
 #     global num_task_cancelled_errors, exception_counts
 #     try: 
 #         messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}]
 #         response = await router.acompletion(model="azure-model", messages=messages)
 #         return response
 #     except asyncio.exceptions.CancelledError:
 #         exception_type = "CancelledError"
 #         exception_counts[exception_type] = exception_counts.get(exception_type, 0) + 1
 #         print("Task was cancelled") 
 #         num_task_cancelled_errors += 1
 #         exception_data.append({
 #             "type": exception_type,
 #             "traceback": None
 #         })
 #         return None
 #     except Exception as e: 
 #         exception_type = type(e).__name__
 #         exception_counts[exception_type] = exception_counts.get(exception_type, 0) + 1
 #         exception_data.append({
 #             "type": exception_type,
 #             "traceback": traceback.format_exc()
 #         })
 #         return None
-# async def loadtest_fn(n = 1000):
+# async def loadtest_fn(n = 1452):
 #     global num_task_cancelled_errors, exception_counts
 #     start = time.time()
 #     tasks = [router_completion() for _ in range(n)]
 #     chat_completions = await asyncio.gather(*tasks)
 #     successful_completions = [c for c in chat_completions if c is not None]
 #     print(n, time.time() - start, len(successful_completions))
 #     # Print exception breakdown
 #     print("Exception Breakdown:")
 #     for exception_type, count in exception_counts.items():
 #         print(f"{exception_type}: {count}")
 #     # Store exception_data in a file
 #     with open('exception_data.txt', 'w') as file:
 #         for data in exception_data:
 #             file.write(f"Type: {data['type']}\n")
 #             if data['traceback']:
 #                 file.write(f"Traceback:\n{data['traceback']}\n\n")
-# # loop = asyncio.get_event_loop()
+# loop = asyncio.get_event_loop()
-# # loop.set_debug(True)
+# loop.set_debug(True)
-# # log_slow_callbacks.enable(0.05)  # Log callbacks slower than 0.05 seconds
+# log_slow_callbacks.enable(0.05)  # Log callbacks slower than 0.05 seconds
-# # # Excute the load testing function within the asyncio event loop
+# # Excute the load testing function within the asyncio event loop
-# # loop.run_until_complete(loadtest_fn())
+# loop.run_until_complete(loadtest_fn())
-# ### SUSTAINED LOAD TESTS ###
+# # ### SUSTAINED LOAD TESTS ###
-# import time, asyncio
+# # import time, asyncio
-# async def make_requests(n):
+# # async def make_requests(n):
-#     tasks = [router_completion() for _ in range(n)]
+# #     tasks = [router_completion() for _ in range(n)]
-#     print(f"num tasks: {len(tasks)}")
+# #     print(f"num tasks: {len(tasks)}")
-#     chat_completions = await asyncio.gather(*tasks)
+# #     chat_completions = await asyncio.gather(*tasks)
-#     successful_completions = [c for c in chat_completions if c is not None]
+# #     successful_completions = [c for c in chat_completions if c is not None]
-#     print(f"successful_completions: {len(successful_completions)}")
+# #     print(f"successful_completions: {len(successful_completions)}")
-#     return successful_completions
+# #     return successful_completions
-# async def main():
+# # async def main():
-#   start_time = time.time()
+# #   start_time = time.time()
-#   total_successful_requests = 0
+# #   total_successful_requests = 0
-#   request_limit = 1000
+# #   request_limit = 1000
-#   batches = 2  # batches of 1k requests
+# #   batches = 2  # batches of 1k requests
-#   start = time.time() 
+# #   start = time.time() 
-#   tasks = []  # list to hold all tasks
+# #   tasks = []  # list to hold all tasks
-#   async def request_loop():
+# #   async def request_loop():
-#     nonlocal tasks
+# #     nonlocal tasks
-#     for _ in range(batches):
+# #     for _ in range(batches):
-#         # Make 1,000 requests
+# #         # Make 1,000 requests
-#         task = asyncio.create_task(make_requests(request_limit))
+# #         task = asyncio.create_task(make_requests(request_limit))
-#         tasks.append(task)
+# #         tasks.append(task)
-#         # Introduce a delay to achieve 1,000 requests per second
+# #         # Introduce a delay to achieve 1,000 requests per second
-#         await asyncio.sleep(1)
+# #         await asyncio.sleep(1)
-#   await request_loop()
+# #   await request_loop()
-#   results = await asyncio.gather(*tasks)
+# #   results = await asyncio.gather(*tasks)
-#   total_successful_requests = sum(len(res) for res in results)
+# #   total_successful_requests = sum(len(res) for res in results)
-#   print(request_limit*batches, time.time() - start, total_successful_requests)
+# #   print(request_limit*batches, time.time() - start, total_successful_requests)
-# asyncio.run(main())
+# # asyncio.run(main())
--- a/litellm/tests/test_token_counter.py
+++ b/litellm/tests/test_token_counter.py
@ -11,6 +11,23 @@ import time
 from litellm import token_counter, encode, decode
 def test_token_counter_normal_plus_function_calling(): 
    try: 
        messages = [
            {'role': 'system', 'content': "System prompt"}, 
            {'role': 'user', 'content': 'content1'}, 
            {'role': 'assistant', 'content': 'content2'}, 
            {'role': 'user', 'content': 'conten3'}, 
            {'role': 'assistant', 'content': None, 'tool_calls': [{'id': 'call_E0lOb1h6qtmflUyok4L06TgY', 'function': {'arguments': '{"query":"search query","domain":"google.ca","gl":"ca","hl":"en"}', 'name': 'SearchInternet'}, 'type': 'function'}]}, 
            {'tool_call_id': 'call_E0lOb1h6qtmflUyok4L06TgY', 'role': 'tool', 'name': 'SearchInternet', 'content': 'tool content'}
        ]
        tokens = token_counter(model="gpt-3.5-turbo", messages=messages)
        print(f"tokens: {tokens}")
    except Exception as e: 
        pytest.fail(f"An exception occurred - {str(e)}")
 test_token_counter_normal_plus_function_calling() 
 def test_tokenizers():
    try: 
        ### test the openai, claude, cohere and llama2 tokenizers. 
@ -69,4 +86,4 @@ def test_encoding_and_decoding():
    except Exception as e: 
        pytest.fail(f'An exception occured: {e}')
-test_encoding_and_decoding() 
+# test_encoding_and_decoding()