(cookbook) load test litellm router

2025-04-27 11:43:54 +00:00 · 2024-02-08 07:24:28 -08:00 · 2024-02-08 07:24:28 -08:00 · 28b3ae45fa
commit 28b3ae45fa
parent aec4572285
4 changed files with 271 additions and 11 deletions
--- a/litellm/tests/test_loadtest_router.py
+++ b/litellm/tests/test_loadtest_router.py
@ -9,6 +9,7 @@
 # )  # Adds the parent directory to the system path
 # import asyncio
 # from litellm import Router, Timeout
+# import time


 # async def call_acompletion(semaphore, router: Router, input_data):
@ -17,6 +18,7 @@
 #             # Use asyncio.wait_for to set a timeout for the task
 #             response = await router.acompletion(**input_data)
 #             # Handle the response as needed
+#             print(response)
 #             return response
 #         except Timeout:
 #             print(f"Task timed out: {input_data}")
@ -42,15 +44,6 @@
 #                 "api_version": os.getenv("AZURE_API_VERSION"),
 #             },
 #         },
-#         {
-#             "model_name": "gpt-3.5-turbo",
-#             "litellm_params": {
-#                 "model": "azure/chatgpt-functioncalling",
-#                 "api_key": os.getenv("AZURE_API_KEY"),
-#                 "api_base": os.getenv("AZURE_API_BASE"),
-#                 "api_version": os.getenv("AZURE_API_VERSION"),
-#             },
-#         },
 #     ]
 #     router = Router(model_list=model_list, num_retries=3, timeout=10)

@ -59,9 +52,9 @@

 #     # List to hold all task references
 #     tasks = []
-
+#     start_time_all_tasks = time.time()
 #     # Launch 1000 tasks
-#     for _ in range(1000):
+#     for _ in range(500):
 #         task = asyncio.create_task(
 #             call_acompletion(
 #                 semaphore,
@ -77,6 +70,15 @@
 #     # Wait for all tasks to complete
 #     responses = await asyncio.gather(*tasks)
 #     # Process responses as needed
+#     # Record the end time for all tasks
+#     end_time_all_tasks = time.time()
+#     # Calculate the total time for all tasks
+#     total_time_all_tasks = end_time_all_tasks - start_time_all_tasks
+#     print(f"Total time for all tasks: {total_time_all_tasks} seconds")
+
+#     # Calculate the average time per response
+#     average_time_per_response = total_time_all_tasks / len(responses)
+#     print(f"Average time per response: {average_time_per_response} seconds")
 #     print(f"NUMBER OF COMPLETED TASKS: {len(responses)}")