From 6e68cd11253ecb9f29fe0e11d66967a49ed94455 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 29 Dec 2023 13:41:29 +0530 Subject: [PATCH] docs(load_test.md): add litellm load test script to docs --- docs/my-website/docs/load_test.md | 87 +++++++++++++++++++++++++++ docs/my-website/sidebars.js | 1 + litellm/tests/test_loadtest_router.py | 29 ++++++--- 3 files changed, 110 insertions(+), 7 deletions(-) create mode 100644 docs/my-website/docs/load_test.md diff --git a/docs/my-website/docs/load_test.md b/docs/my-website/docs/load_test.md new file mode 100644 index 000000000..176731a96 --- /dev/null +++ b/docs/my-website/docs/load_test.md @@ -0,0 +1,87 @@ +# 🔥 Load Test LiteLLM + +Here is a script to load test LiteLLM + +```python +from openai import AsyncOpenAI, AsyncAzureOpenAI +import random, uuid +import time, asyncio, litellm +# import logging +# logging.basicConfig(level=logging.DEBUG) +#### LITELLM PROXY #### +litellm_client = AsyncOpenAI( + api_key="sk-1234", # [CHANGE THIS] + base_url="http://0.0.0.0:8000" +) + +#### AZURE OPENAI CLIENT #### +client = AsyncAzureOpenAI( + api_key="my-api-key", # [CHANGE THIS] + azure_endpoint="my-api-base", # [CHANGE THIS] + api_version="2023-07-01-preview" +) + + +#### LITELLM ROUTER #### +model_list = [ + { + "model_name": "azure-canada", + "litellm_params": { + "model": "azure/my-azure-deployment-name", # [CHANGE THIS] + "api_key": "my-api-key", # [CHANGE THIS] + "api_base": "my-api-base", # [CHANGE THIS] + "api_version": "2023-07-01-preview" + } + } +] + +router = litellm.Router(model_list=model_list) + +async def openai_completion(): + try: + response = await client.chat.completions.create( + model="gpt-35-turbo", + messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}], + stream=True + ) + return response + except Exception as e: + print(e) + return None + + +async def router_completion(): + try: + response = await router.acompletion( + model="azure-canada", # [CHANGE THIS] + messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}], + stream=True + ) + return response + except Exception as e: + print(e) + return None + +async def proxy_completion_non_streaming(): + try: + response = await litellm_client.chat.completions.create( + model="sagemaker-models", # [CHANGE THIS] (if you call it something else on your proxy) + messages=[{"role": "user", "content": f"This is a test: {uuid.uuid4()}"}], + ) + return response + except Exception as e: + print(e) + return None + +async def loadtest_fn(): + start = time.time() + n = 500 # Number of concurrent tasks + tasks = [proxy_completion_non_streaming() for _ in range(n)] + chat_completions = await asyncio.gather(*tasks) + successful_completions = [c for c in chat_completions if c is not None] + print(n, time.time() - start, len(successful_completions)) + +# Run the event loop to execute the async function +asyncio.run(loadtest_fn()) + +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index b2cd0fd9d..70cedfaa1 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -124,6 +124,7 @@ const sidebars = { "budget_manager", "secret", "completion/token_usage", + "load_test", { type: 'category', label: 'Tutorials', diff --git a/litellm/tests/test_loadtest_router.py b/litellm/tests/test_loadtest_router.py index 5dc60ca3f..fc3a7c5ad 100644 --- a/litellm/tests/test_loadtest_router.py +++ b/litellm/tests/test_loadtest_router.py @@ -25,29 +25,33 @@ # async def main(): # # Initialize the Router -# model_list= [{ +# model_list = [ +# { # "model_name": "gpt-3.5-turbo", # "litellm_params": { # "model": "gpt-3.5-turbo", # "api_key": os.getenv("OPENAI_API_KEY"), # }, -# }, { +# }, +# { # "model_name": "gpt-3.5-turbo", # "litellm_params": { # "model": "azure/chatgpt-v-2", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), -# "api_version": os.getenv("AZURE_API_VERSION") +# "api_version": os.getenv("AZURE_API_VERSION"), # }, -# }, { +# }, +# { # "model_name": "gpt-3.5-turbo", # "litellm_params": { # "model": "azure/chatgpt-functioncalling", # "api_key": os.getenv("AZURE_API_KEY"), # "api_base": os.getenv("AZURE_API_BASE"), -# "api_version": os.getenv("AZURE_API_VERSION") +# "api_version": os.getenv("AZURE_API_VERSION"), # }, -# }] +# }, +# ] # router = Router(model_list=model_list, num_retries=3, timeout=10) # # Create a semaphore with a capacity of 100 @@ -58,12 +62,23 @@ # # Launch 1000 tasks # for _ in range(1000): -# task = asyncio.create_task(call_acompletion(semaphore, router, {"model": "gpt-3.5-turbo", "messages": [{"role":"user", "content": "Hey, how's it going?"}]})) +# task = asyncio.create_task( +# call_acompletion( +# semaphore, +# router, +# { +# "model": "gpt-3.5-turbo", +# "messages": [{"role": "user", "content": "Hey, how's it going?"}], +# }, +# ) +# ) # tasks.append(task) # # Wait for all tasks to complete # responses = await asyncio.gather(*tasks) # # Process responses as needed # print(f"NUMBER OF COMPLETED TASKS: {len(responses)}") + + # # Run the main function # asyncio.run(main())