forked from phoenix/litellm-mirror
(chore) fix testing
This commit is contained in:
parent
50cc4a8595
commit
4c1ef4e270
1 changed files with 99 additions and 99 deletions
|
@ -1,116 +1,116 @@
|
||||||
#### What this tests ####
|
# #### What this tests ####
|
||||||
# This profiles a router call to find where calls are taking the most time.
|
# # This profiles a router call to find where calls are taking the most time.
|
||||||
|
|
||||||
import sys, os, time, logging
|
# import sys, os, time, logging
|
||||||
import traceback, asyncio, uuid
|
# import traceback, asyncio, uuid
|
||||||
import pytest
|
# import pytest
|
||||||
import cProfile
|
# import cProfile
|
||||||
from pstats import Stats
|
# from pstats import Stats
|
||||||
sys.path.insert(
|
# sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
# 0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
# ) # Adds the parent directory to the system path
|
||||||
import litellm
|
# import litellm
|
||||||
from litellm import Router
|
# from litellm import Router
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
# from concurrent.futures import ThreadPoolExecutor
|
||||||
from dotenv import load_dotenv
|
# from dotenv import load_dotenv
|
||||||
from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks
|
# from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks
|
||||||
|
|
||||||
load_dotenv()
|
# load_dotenv()
|
||||||
|
|
||||||
logging.basicConfig(
|
# logging.basicConfig(
|
||||||
level=logging.DEBUG,
|
# level=logging.DEBUG,
|
||||||
format='%(asctime)s %(levelname)s: %(message)s',
|
# format='%(asctime)s %(levelname)s: %(message)s',
|
||||||
datefmt='%I:%M:%S %p',
|
# datefmt='%I:%M:%S %p',
|
||||||
filename='aiologs.log', # Name of the log file where logs will be written
|
# filename='aiologs.log', # Name of the log file where logs will be written
|
||||||
filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append
|
# filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append
|
||||||
)
|
# )
|
||||||
|
|
||||||
|
|
||||||
model_list = [{
|
# model_list = [{
|
||||||
"model_name": "azure-model",
|
# "model_name": "azure-model",
|
||||||
"litellm_params": {
|
# "litellm_params": {
|
||||||
"model": "azure/gpt-turbo",
|
# "model": "azure/gpt-turbo",
|
||||||
"api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
# "api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
||||||
"api_base": "https://openai-france-1234.openai.azure.com",
|
# "api_base": "https://openai-france-1234.openai.azure.com",
|
||||||
"rpm": 1440,
|
# "rpm": 1440,
|
||||||
}
|
# }
|
||||||
}, {
|
# }, {
|
||||||
"model_name": "azure-model",
|
# "model_name": "azure-model",
|
||||||
"litellm_params": {
|
# "litellm_params": {
|
||||||
"model": "azure/gpt-35-turbo",
|
# "model": "azure/gpt-35-turbo",
|
||||||
"api_key": "os.environ/AZURE_EUROPE_API_KEY",
|
# "api_key": "os.environ/AZURE_EUROPE_API_KEY",
|
||||||
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
|
# "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
|
||||||
"rpm": 6
|
# "rpm": 6
|
||||||
}
|
# }
|
||||||
}, {
|
# }, {
|
||||||
"model_name": "azure-model",
|
# "model_name": "azure-model",
|
||||||
"litellm_params": {
|
# "litellm_params": {
|
||||||
"model": "azure/gpt-35-turbo",
|
# "model": "azure/gpt-35-turbo",
|
||||||
"api_key": "os.environ/AZURE_CANADA_API_KEY",
|
# "api_key": "os.environ/AZURE_CANADA_API_KEY",
|
||||||
"api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
|
# "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
|
||||||
"rpm": 6
|
# "rpm": 6
|
||||||
}
|
# }
|
||||||
}]
|
# }]
|
||||||
|
|
||||||
router = Router(model_list=model_list, set_verbose=False, num_retries=3)
|
# router = Router(model_list=model_list, set_verbose=False, num_retries=3)
|
||||||
|
|
||||||
async def router_completion():
|
# async def router_completion():
|
||||||
try:
|
# try:
|
||||||
messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}]
|
# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}]
|
||||||
response = await router.acompletion(model="azure-model", messages=messages)
|
# response = await router.acompletion(model="azure-model", messages=messages)
|
||||||
return response
|
# return response
|
||||||
except asyncio.exceptions.CancelledError:
|
# except asyncio.exceptions.CancelledError:
|
||||||
print("Task was cancelled")
|
# print("Task was cancelled")
|
||||||
return None
|
# return None
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
return None
|
# return None
|
||||||
|
|
||||||
async def loadtest_fn(n = 1000):
|
# async def loadtest_fn(n = 1000):
|
||||||
start = time.time()
|
# start = time.time()
|
||||||
tasks = [router_completion() for _ in range(n)]
|
# tasks = [router_completion() for _ in range(n)]
|
||||||
chat_completions = await asyncio.gather(*tasks)
|
# chat_completions = await asyncio.gather(*tasks)
|
||||||
successful_completions = [c for c in chat_completions if c is not None]
|
# successful_completions = [c for c in chat_completions if c is not None]
|
||||||
print(n, time.time() - start, len(successful_completions))
|
# print(n, time.time() - start, len(successful_completions))
|
||||||
|
|
||||||
# loop = asyncio.get_event_loop()
|
# # loop = asyncio.get_event_loop()
|
||||||
# loop.set_debug(True)
|
# # loop.set_debug(True)
|
||||||
# log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds
|
# # log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds
|
||||||
|
|
||||||
# # Excute the load testing function within the asyncio event loop
|
# # # Excute the load testing function within the asyncio event loop
|
||||||
# loop.run_until_complete(loadtest_fn())
|
# # loop.run_until_complete(loadtest_fn())
|
||||||
|
|
||||||
### SUSTAINED LOAD TESTS ###
|
# ### SUSTAINED LOAD TESTS ###
|
||||||
import time, asyncio
|
# import time, asyncio
|
||||||
async def make_requests(n):
|
# async def make_requests(n):
|
||||||
tasks = [router_completion() for _ in range(n)]
|
# tasks = [router_completion() for _ in range(n)]
|
||||||
print(f"num tasks: {len(tasks)}")
|
# print(f"num tasks: {len(tasks)}")
|
||||||
chat_completions = await asyncio.gather(*tasks)
|
# chat_completions = await asyncio.gather(*tasks)
|
||||||
successful_completions = [c for c in chat_completions if c is not None]
|
# successful_completions = [c for c in chat_completions if c is not None]
|
||||||
print(f"successful_completions: {len(successful_completions)}")
|
# print(f"successful_completions: {len(successful_completions)}")
|
||||||
return successful_completions
|
# return successful_completions
|
||||||
|
|
||||||
async def main():
|
# async def main():
|
||||||
start_time = time.time()
|
# start_time = time.time()
|
||||||
total_successful_requests = 0
|
# total_successful_requests = 0
|
||||||
request_limit = 1000
|
# request_limit = 1000
|
||||||
batches = 2 # batches of 1k requests
|
# batches = 2 # batches of 1k requests
|
||||||
start = time.time()
|
# start = time.time()
|
||||||
tasks = [] # list to hold all tasks
|
# tasks = [] # list to hold all tasks
|
||||||
|
|
||||||
async def request_loop():
|
# async def request_loop():
|
||||||
nonlocal tasks
|
# nonlocal tasks
|
||||||
for _ in range(batches):
|
# for _ in range(batches):
|
||||||
# Make 1,000 requests
|
# # Make 1,000 requests
|
||||||
task = asyncio.create_task(make_requests(request_limit))
|
# task = asyncio.create_task(make_requests(request_limit))
|
||||||
tasks.append(task)
|
# tasks.append(task)
|
||||||
|
|
||||||
# Introduce a delay to achieve 1,000 requests per second
|
# # Introduce a delay to achieve 1,000 requests per second
|
||||||
await asyncio.sleep(1)
|
# await asyncio.sleep(1)
|
||||||
|
|
||||||
await request_loop()
|
# await request_loop()
|
||||||
results = await asyncio.gather(*tasks)
|
# results = await asyncio.gather(*tasks)
|
||||||
total_successful_requests = sum(len(res) for res in results)
|
# total_successful_requests = sum(len(res) for res in results)
|
||||||
|
|
||||||
print(request_limit*batches, time.time() - start, total_successful_requests)
|
# print(request_limit*batches, time.time() - start, total_successful_requests)
|
||||||
|
|
||||||
asyncio.run(main())
|
# asyncio.run(main())
|
Loading…
Add table
Add a link
Reference in a new issue