From 4c1ef4e27035e2a6ecf2894211d23f81bfe56b6b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 29 Nov 2023 20:05:10 -0800 Subject: [PATCH] (chore) fix testing --- litellm/tests/test_profiling_router.py | 198 ++++++++++++------------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/litellm/tests/test_profiling_router.py b/litellm/tests/test_profiling_router.py index d922b816b..e3b291db7 100644 --- a/litellm/tests/test_profiling_router.py +++ b/litellm/tests/test_profiling_router.py @@ -1,116 +1,116 @@ -#### What this tests #### -# This profiles a router call to find where calls are taking the most time. +# #### What this tests #### +# # This profiles a router call to find where calls are taking the most time. -import sys, os, time, logging -import traceback, asyncio, uuid -import pytest -import cProfile -from pstats import Stats -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path -import litellm -from litellm import Router -from concurrent.futures import ThreadPoolExecutor -from dotenv import load_dotenv -from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks +# import sys, os, time, logging +# import traceback, asyncio, uuid +# import pytest +# import cProfile +# from pstats import Stats +# sys.path.insert( +# 0, os.path.abspath("../..") +# ) # Adds the parent directory to the system path +# import litellm +# from litellm import Router +# from concurrent.futures import ThreadPoolExecutor +# from dotenv import load_dotenv +# from aiodebug import log_slow_callbacks # Import the aiodebug utility for logging slow callbacks -load_dotenv() +# load_dotenv() -logging.basicConfig( - level=logging.DEBUG, - format='%(asctime)s %(levelname)s: %(message)s', - datefmt='%I:%M:%S %p', - filename='aiologs.log', # Name of the log file where logs will be written - filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append -) +# logging.basicConfig( +# level=logging.DEBUG, +# format='%(asctime)s %(levelname)s: %(message)s', +# datefmt='%I:%M:%S %p', +# filename='aiologs.log', # Name of the log file where logs will be written +# filemode='w' # 'w' to overwrite the log file on each run, use 'a' to append +# ) -model_list = [{ - "model_name": "azure-model", - "litellm_params": { - "model": "azure/gpt-turbo", - "api_key": "os.environ/AZURE_FRANCE_API_KEY", - "api_base": "https://openai-france-1234.openai.azure.com", - "rpm": 1440, - } -}, { - "model_name": "azure-model", - "litellm_params": { - "model": "azure/gpt-35-turbo", - "api_key": "os.environ/AZURE_EUROPE_API_KEY", - "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com", - "rpm": 6 - } -}, { - "model_name": "azure-model", - "litellm_params": { - "model": "azure/gpt-35-turbo", - "api_key": "os.environ/AZURE_CANADA_API_KEY", - "api_base": "https://my-endpoint-canada-berri992.openai.azure.com", - "rpm": 6 - } -}] +# model_list = [{ +# "model_name": "azure-model", +# "litellm_params": { +# "model": "azure/gpt-turbo", +# "api_key": "os.environ/AZURE_FRANCE_API_KEY", +# "api_base": "https://openai-france-1234.openai.azure.com", +# "rpm": 1440, +# } +# }, { +# "model_name": "azure-model", +# "litellm_params": { +# "model": "azure/gpt-35-turbo", +# "api_key": "os.environ/AZURE_EUROPE_API_KEY", +# "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com", +# "rpm": 6 +# } +# }, { +# "model_name": "azure-model", +# "litellm_params": { +# "model": "azure/gpt-35-turbo", +# "api_key": "os.environ/AZURE_CANADA_API_KEY", +# "api_base": "https://my-endpoint-canada-berri992.openai.azure.com", +# "rpm": 6 +# } +# }] -router = Router(model_list=model_list, set_verbose=False, num_retries=3) +# router = Router(model_list=model_list, set_verbose=False, num_retries=3) -async def router_completion(): - try: - messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}] - response = await router.acompletion(model="azure-model", messages=messages) - return response - except asyncio.exceptions.CancelledError: - print("Task was cancelled") - return None - except Exception as e: - return None +# async def router_completion(): +# try: +# messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}] +# response = await router.acompletion(model="azure-model", messages=messages) +# return response +# except asyncio.exceptions.CancelledError: +# print("Task was cancelled") +# return None +# except Exception as e: +# return None -async def loadtest_fn(n = 1000): - start = time.time() - tasks = [router_completion() for _ in range(n)] - chat_completions = await asyncio.gather(*tasks) - successful_completions = [c for c in chat_completions if c is not None] - print(n, time.time() - start, len(successful_completions)) +# async def loadtest_fn(n = 1000): +# start = time.time() +# tasks = [router_completion() for _ in range(n)] +# chat_completions = await asyncio.gather(*tasks) +# successful_completions = [c for c in chat_completions if c is not None] +# print(n, time.time() - start, len(successful_completions)) -# loop = asyncio.get_event_loop() -# loop.set_debug(True) -# log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds +# # loop = asyncio.get_event_loop() +# # loop.set_debug(True) +# # log_slow_callbacks.enable(0.05) # Log callbacks slower than 0.05 seconds -# # Excute the load testing function within the asyncio event loop -# loop.run_until_complete(loadtest_fn()) +# # # Excute the load testing function within the asyncio event loop +# # loop.run_until_complete(loadtest_fn()) -### SUSTAINED LOAD TESTS ### -import time, asyncio -async def make_requests(n): - tasks = [router_completion() for _ in range(n)] - print(f"num tasks: {len(tasks)}") - chat_completions = await asyncio.gather(*tasks) - successful_completions = [c for c in chat_completions if c is not None] - print(f"successful_completions: {len(successful_completions)}") - return successful_completions +# ### SUSTAINED LOAD TESTS ### +# import time, asyncio +# async def make_requests(n): +# tasks = [router_completion() for _ in range(n)] +# print(f"num tasks: {len(tasks)}") +# chat_completions = await asyncio.gather(*tasks) +# successful_completions = [c for c in chat_completions if c is not None] +# print(f"successful_completions: {len(successful_completions)}") +# return successful_completions -async def main(): - start_time = time.time() - total_successful_requests = 0 - request_limit = 1000 - batches = 2 # batches of 1k requests - start = time.time() - tasks = [] # list to hold all tasks +# async def main(): +# start_time = time.time() +# total_successful_requests = 0 +# request_limit = 1000 +# batches = 2 # batches of 1k requests +# start = time.time() +# tasks = [] # list to hold all tasks - async def request_loop(): - nonlocal tasks - for _ in range(batches): - # Make 1,000 requests - task = asyncio.create_task(make_requests(request_limit)) - tasks.append(task) +# async def request_loop(): +# nonlocal tasks +# for _ in range(batches): +# # Make 1,000 requests +# task = asyncio.create_task(make_requests(request_limit)) +# tasks.append(task) - # Introduce a delay to achieve 1,000 requests per second - await asyncio.sleep(1) +# # Introduce a delay to achieve 1,000 requests per second +# await asyncio.sleep(1) - await request_loop() - results = await asyncio.gather(*tasks) - total_successful_requests = sum(len(res) for res in results) +# await request_loop() +# results = await asyncio.gather(*tasks) +# total_successful_requests = sum(len(res) for res in results) - print(request_limit*batches, time.time() - start, total_successful_requests) +# print(request_limit*batches, time.time() - start, total_successful_requests) -asyncio.run(main()) \ No newline at end of file +# asyncio.run(main()) \ No newline at end of file