(chore) fix testing

This commit is contained in:
ishaan-jaff 2023-11-29 20:05:10 -08:00
parent 50cc4a8595
commit 4c1ef4e270

View file

@ -1,116 +1,116 @@
#### What this tests #### # #### What this tests ####
# This profiles a router call to find where calls are taking the most time. # # This profiles a router call to find where calls are taking the most time.
import sys, os, time, logging # import sys, os, time, logging
import traceback, asyncio, uuid # import traceback, asyncio, uuid
import pytest # import pytest
import cProfile # import cProfile
from pstats import Stats # from pstats import Stats
sys.path.insert( # sys.path.insert(
0, os.path.abspath("../..") # 0, os.path.abspath("../..")
) # Adds the parent directory to the system path # ) # Adds the parent directory to the system path
import litellm # import litellm
from litellm import Router # from litellm import Router
from concurrent.futures import ThreadPoolExecutor # from concurrent.futures import ThreadPoolExecutor
from dotenv import load_dotenv # from dotenv import load_dotenv
from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks # from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks
load_dotenv() # load_dotenv()
logging.basicConfig( # logging.basicConfig(
level=logging.DEBUG, # level=logging.DEBUG,
format='%(asctime)s %(levelname)s: %(message)s', # format='%(asctime)s %(levelname)s: %(message)s',
datefmt='%I:%M:%S %p', # datefmt='%I:%M:%S %p',
filename='aiologs.log', # Name of the log file where logs will be written # filename='aiologs.log', # Name of the log file where logs will be written
filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append # filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append
) # )
model_list = [{ # model_list = [{
"model_name": "azure-model", # "model_name": "azure-model",
"litellm_params": { # "litellm_params": {
"model": "azure/gpt-turbo", # "model": "azure/gpt-turbo",
"api_key": "os.environ/AZURE_FRANCE_API_KEY", # "api_key": "os.environ/AZURE_FRANCE_API_KEY",
"api_base": "https://openai-france-1234.openai.azure.com", # "api_base": "https://openai-france-1234.openai.azure.com",
"rpm": 1440, # "rpm": 1440,
} # }
}, { # }, {
"model_name": "azure-model", # "model_name": "azure-model",
"litellm_params": { # "litellm_params": {
"model": "azure/gpt-35-turbo", # "model": "azure/gpt-35-turbo",
"api_key": "os.environ/AZURE_EUROPE_API_KEY", # "api_key": "os.environ/AZURE_EUROPE_API_KEY",
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com", # "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
"rpm": 6 # "rpm": 6
} # }
}, { # }, {
"model_name": "azure-model", # "model_name": "azure-model",
"litellm_params": { # "litellm_params": {
"model": "azure/gpt-35-turbo", # "model": "azure/gpt-35-turbo",
"api_key": "os.environ/AZURE_CANADA_API_KEY", # "api_key": "os.environ/AZURE_CANADA_API_KEY",
"api_base": "https://my-endpoint-canada-berri992.openai.azure.com", # "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
"rpm": 6 # "rpm": 6
} # }
}] # }]
router = Router(model_list=model_list, set_verbose=False, num_retries=3) # router = Router(model_list=model_list, set_verbose=False, num_retries=3)
async def router_completion(): # async def router_completion():
try: # try:
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}] # messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}]
response = await router.acompletion(model="azure-model", messages=messages) # response = await router.acompletion(model="azure-model", messages=messages)
return response # return response
except asyncio.exceptions.CancelledError: # except asyncio.exceptions.CancelledError:
print("Task was cancelled") # print("Task was cancelled")
return None # return None
except Exception as e: # except Exception as e:
return None # return None
async def loadtest_fn(n = 1000): # async def loadtest_fn(n = 1000):
start = time.time() # start = time.time()
tasks = [router_completion() for _ in range(n)] # tasks = [router_completion() for _ in range(n)]
chat_completions = await asyncio.gather(*tasks) # chat_completions = await asyncio.gather(*tasks)
successful_completions = [c for c in chat_completions if c is not None] # successful_completions = [c for c in chat_completions if c is not None]
print(n, time.time() - start, len(successful_completions)) # print(n, time.time() - start, len(successful_completions))
# loop = asyncio.get_event_loop() # # loop = asyncio.get_event_loop()
# loop.set_debug(True) # # loop.set_debug(True)
# log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds # # log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds
# # Excute the load testing function within the asyncio event loop # # # Excute the load testing function within the asyncio event loop
# loop.run_until_complete(loadtest_fn()) # # loop.run_until_complete(loadtest_fn())
### SUSTAINED LOAD TESTS ### # ### SUSTAINED LOAD TESTS ###
import time, asyncio # import time, asyncio
async def make_requests(n): # async def make_requests(n):
tasks = [router_completion() for _ in range(n)] # tasks = [router_completion() for _ in range(n)]
print(f"num tasks: {len(tasks)}") # print(f"num tasks: {len(tasks)}")
chat_completions = await asyncio.gather(*tasks) # chat_completions = await asyncio.gather(*tasks)
successful_completions = [c for c in chat_completions if c is not None] # successful_completions = [c for c in chat_completions if c is not None]
print(f"successful_completions: {len(successful_completions)}") # print(f"successful_completions: {len(successful_completions)}")
return successful_completions # return successful_completions
async def main(): # async def main():
start_time = time.time() # start_time = time.time()
total_successful_requests = 0 # total_successful_requests = 0
request_limit = 1000 # request_limit = 1000
batches = 2 # batches of 1k requests # batches = 2 # batches of 1k requests
start = time.time() # start = time.time()
tasks = [] # list to hold all tasks # tasks = [] # list to hold all tasks
async def request_loop(): # async def request_loop():
nonlocal tasks # nonlocal tasks
for _ in range(batches): # for _ in range(batches):
# Make 1,000 requests # # Make 1,000 requests
task = asyncio.create_task(make_requests(request_limit)) # task = asyncio.create_task(make_requests(request_limit))
tasks.append(task) # tasks.append(task)
# Introduce a delay to achieve 1,000 requests per second # # Introduce a delay to achieve 1,000 requests per second
await asyncio.sleep(1) # await asyncio.sleep(1)
await request_loop() # await request_loop()
results = await asyncio.gather(*tasks) # results = await asyncio.gather(*tasks)
total_successful_requests = sum(len(res) for res in results) # total_successful_requests = sum(len(res) for res in results)
print(request_limit*batches, time.time() - start, total_successful_requests) # print(request_limit*batches, time.time() - start, total_successful_requests)
asyncio.run(main()) # asyncio.run(main())