diff --git a/cookbook/litellm_router_load_test/test_loadtest_openai_client.py b/cookbook/litellm_router_load_test/test_loadtest_openai_client.py new file mode 100644 index 000000000..d11249cd2 --- /dev/null +++ b/cookbook/litellm_router_load_test/test_loadtest_openai_client.py @@ -0,0 +1,76 @@ +import sys, os +import traceback +from dotenv import load_dotenv +import copy + +load_dotenv() +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import asyncio +from litellm import Router, Timeout +import time +from litellm.caching import Cache +import litellm +import openai + +### Test just calling AsyncAzureOpenAI + +openai_client = openai.AsyncAzureOpenAI( + azure_endpoint=os.getenv("AZURE_API_BASE"), + api_key=os.getenv("AZURE_API_KEY"), +) + + +async def call_acompletion(semaphore, input_data): + async with semaphore: + try: + # Use asyncio.wait_for to set a timeout for the task + response = await openai_client.chat.completions.create(**input_data) + # Handle the response as needed + print(response) + return response + except Timeout: + print(f"Task timed out: {input_data}") + return None # You may choose to return something else or raise an exception + + +async def main(): + # Initialize the Router + + # Create a semaphore with a capacity of 100 + semaphore = asyncio.Semaphore(100) + + # List to hold all task references + tasks = [] + start_time_all_tasks = time.time() + # Launch 1000 tasks + for _ in range(500): + task = asyncio.create_task( + call_acompletion( + semaphore, + { + "model": "chatgpt-v-2", + "messages": [{"role": "user", "content": "Hey, how's it going?"}], + }, + ) + ) + tasks.append(task) + + # Wait for all tasks to complete + responses = await asyncio.gather(*tasks) + # Process responses as needed + # Record the end time for all tasks + end_time_all_tasks = time.time() + # Calculate the total time for all tasks + total_time_all_tasks = end_time_all_tasks - start_time_all_tasks + print(f"Total time for all tasks: {total_time_all_tasks} seconds") + + # Calculate the average time per response + average_time_per_response = total_time_all_tasks / len(responses) + print(f"Average time per response: {average_time_per_response} seconds") + print(f"NUMBER OF COMPLETED TASKS: {len(responses)}") + + +# Run the main function +asyncio.run(main()) diff --git a/cookbook/litellm_router_load_test/test_loadtest_router.py b/cookbook/litellm_router_load_test/test_loadtest_router.py new file mode 100644 index 000000000..a44bf4ccb --- /dev/null +++ b/cookbook/litellm_router_load_test/test_loadtest_router.py @@ -0,0 +1,88 @@ +import sys, os +import traceback +from dotenv import load_dotenv +import copy + +load_dotenv() +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import asyncio +from litellm import Router, Timeout +import time + +### Test calling router async + + +async def call_acompletion(semaphore, router: Router, input_data): + async with semaphore: + try: + # Use asyncio.wait_for to set a timeout for the task + response = await router.acompletion(**input_data) + # Handle the response as needed + print(response) + return response + except Timeout: + print(f"Task timed out: {input_data}") + return None # You may choose to return something else or raise an exception + + +async def main(): + # Initialize the Router + model_list = [ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + }, + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_base": os.getenv("AZURE_API_BASE"), + "api_version": os.getenv("AZURE_API_VERSION"), + }, + }, + ] + router = Router(model_list=model_list, num_retries=3, timeout=10) + + # Create a semaphore with a capacity of 100 + semaphore = asyncio.Semaphore(100) + + # List to hold all task references + tasks = [] + start_time_all_tasks = time.time() + # Launch 1000 tasks + for _ in range(500): + task = asyncio.create_task( + call_acompletion( + semaphore, + router, + { + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Hey, how's it going?"}], + }, + ) + ) + tasks.append(task) + + # Wait for all tasks to complete + responses = await asyncio.gather(*tasks) + # Process responses as needed + # Record the end time for all tasks + end_time_all_tasks = time.time() + # Calculate the total time for all tasks + total_time_all_tasks = end_time_all_tasks - start_time_all_tasks + print(f"Total time for all tasks: {total_time_all_tasks} seconds") + + # Calculate the average time per response + average_time_per_response = total_time_all_tasks / len(responses) + print(f"Average time per response: {average_time_per_response} seconds") + print(f"NUMBER OF COMPLETED TASKS: {len(responses)}") + + +# Run the main function +asyncio.run(main()) diff --git a/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py b/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py new file mode 100644 index 000000000..5e6dd218c --- /dev/null +++ b/cookbook/litellm_router_load_test/test_loadtest_router_withs3_cache.py @@ -0,0 +1,94 @@ +import sys, os +import traceback +from dotenv import load_dotenv +import copy + +load_dotenv() +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import asyncio +from litellm import Router, Timeout +import time +from litellm.caching import Cache +import litellm + +litellm.cache = Cache( + type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2" +) + +### Test calling router with s3 Cache + + +async def call_acompletion(semaphore, router: Router, input_data): + async with semaphore: + try: + # Use asyncio.wait_for to set a timeout for the task + response = await router.acompletion(**input_data) + # Handle the response as needed + print(response) + return response + except Timeout: + print(f"Task timed out: {input_data}") + return None # You may choose to return something else or raise an exception + + +async def main(): + # Initialize the Router + model_list = [ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + }, + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_base": os.getenv("AZURE_API_BASE"), + "api_version": os.getenv("AZURE_API_VERSION"), + }, + }, + ] + router = Router(model_list=model_list, num_retries=3, timeout=10) + + # Create a semaphore with a capacity of 100 + semaphore = asyncio.Semaphore(100) + + # List to hold all task references + tasks = [] + start_time_all_tasks = time.time() + # Launch 1000 tasks + for _ in range(500): + task = asyncio.create_task( + call_acompletion( + semaphore, + router, + { + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Hey, how's it going?"}], + }, + ) + ) + tasks.append(task) + + # Wait for all tasks to complete + responses = await asyncio.gather(*tasks) + # Process responses as needed + # Record the end time for all tasks + end_time_all_tasks = time.time() + # Calculate the total time for all tasks + total_time_all_tasks = end_time_all_tasks - start_time_all_tasks + print(f"Total time for all tasks: {total_time_all_tasks} seconds") + + # Calculate the average time per response + average_time_per_response = total_time_all_tasks / len(responses) + print(f"Average time per response: {average_time_per_response} seconds") + print(f"NUMBER OF COMPLETED TASKS: {len(responses)}") + + +# Run the main function +asyncio.run(main()) diff --git a/litellm/tests/test_loadtest_router.py b/litellm/tests/test_loadtest_router.py index fc3a7c5ad..a12a45b51 100644 --- a/litellm/tests/test_loadtest_router.py +++ b/litellm/tests/test_loadtest_router.py @@ -9,6 +9,7 @@ # ) # Adds the parent directory to the system path # import asyncio # from litellm import Router, Timeout +# import time # async def call_acompletion(semaphore, router: Router, input_data): @@ -17,6 +18,7 @@ # # Use asyncio.wait_for to set a timeout for the task # response = await router.acompletion(**input_data) # # Handle the response as needed +# print(response) # return response # except Timeout: # print(f"Task timed out: {input_data}") @@ -42,15 +44,6 @@ # "api_version": os.getenv("AZURE_API_VERSION"), # }, # }, -# { -# "model_name": "gpt-3.5-turbo", -# "litellm_params": { -# "model": "azure/chatgpt-functioncalling", -# "api_key": os.getenv("AZURE_API_KEY"), -# "api_base": os.getenv("AZURE_API_BASE"), -# "api_version": os.getenv("AZURE_API_VERSION"), -# }, -# }, # ] # router = Router(model_list=model_list, num_retries=3, timeout=10) @@ -59,9 +52,9 @@ # # List to hold all task references # tasks = [] - +# start_time_all_tasks = time.time() # # Launch 1000 tasks -# for _ in range(1000): +# for _ in range(500): # task = asyncio.create_task( # call_acompletion( # semaphore, @@ -77,6 +70,15 @@ # # Wait for all tasks to complete # responses = await asyncio.gather(*tasks) # # Process responses as needed +# # Record the end time for all tasks +# end_time_all_tasks = time.time() +# # Calculate the total time for all tasks +# total_time_all_tasks = end_time_all_tasks - start_time_all_tasks +# print(f"Total time for all tasks: {total_time_all_tasks} seconds") + +# # Calculate the average time per response +# average_time_per_response = total_time_all_tasks / len(responses) +# print(f"Average time per response: {average_time_per_response} seconds") # print(f"NUMBER OF COMPLETED TASKS: {len(responses)}")