From 359f542c103ccbab4f9f82fc2a38cf17a5662953 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 21 Nov 2023 20:47:59 -0800 Subject: [PATCH] (test) load test q --- litellm/proxy/tests/load_test_q.py | 121 +++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 litellm/proxy/tests/load_test_q.py diff --git a/litellm/proxy/tests/load_test_q.py b/litellm/proxy/tests/load_test_q.py new file mode 100644 index 000000000..ae9f00948 --- /dev/null +++ b/litellm/proxy/tests/load_test_q.py @@ -0,0 +1,121 @@ +import requests +import time +import os +from dotenv import load_dotenv +load_dotenv() + + +# Set the base URL as needed +# base_url = "https://api.litellm.ai" +# Uncomment the line below if you want to switch to the local server +base_url = "http://0.0.0.0:8000" + +# Step 1 Add a config to the proxy, generate a temp key +config = { + "model_list": [ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": os.environ['OPENAI_API_KEY'], + } + }, + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "azure/chatgpt-v-2", + "api_key": os.environ['AZURE_API_KEY'], + "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/", + "api_version": "2023-07-01-preview" + } + } + ] +} +print("STARTING LOAD TEST Q") +print(os.environ['AZURE_API_KEY']) + +response = requests.post( + url=f"{base_url}/key/generate", + json={ + "config": config, + "duration": "30d" # default to 30d, set it to 30m if you want a temp key + }, + headers={ + "Authorization": "Bearer sk-hosted-litellm" + } +) + +print("\nresponse from generating key", response.text) +print("\n json response from gen key", response.json()) + +generated_key = response.json()["key"] +print("\ngenerated key for proxy", generated_key) + + +# Step 2: Queue 50 requests to the proxy, using your generated_key + +import concurrent.futures + +def create_job_and_poll(request_num): + print(f"Creating a job on the proxy for request {request_num}") + job_response = requests.post( + url=f"{base_url}/queue/request", + json={ + 'model': 'gpt-3.5-turbo', + 'messages': [ + {'role': 'system', 'content': 'write a short poem'}, + ], + }, + headers={ + "Authorization": f"Bearer {generated_key}" + } + ) + print(job_response.status_code) + print(job_response.text) + print("\nResponse from creating job", job_response.text) + job_response = job_response.json() + job_id = job_response["id"] + polling_url = job_response["url"] + polling_url = f"{base_url}{polling_url}" + print(f"\nCreated Job {request_num}, Polling Url {polling_url}") + + # Poll each request + while True: + try: + print(f"\nPolling URL for request {request_num}", polling_url) + polling_response = requests.get( + url=polling_url, + headers={ + "Authorization": f"Bearer {generated_key}" + } + ) + print(f"\nResponse from polling url for request {request_num}", polling_response.text) + polling_response = polling_response.json() + status = polling_response.get("status", None) + if status == "finished": + llm_response = polling_response["result"] + print(f"LLM Response for request {request_num}") + print(llm_response) + # Write the llm_response to load_test_log.txt + try: + with open("load_test_log.txt", "a") as response_file: + response_file.write( + f"HI" + ) + except Exception as e: + print("GOT EXCEPTION", e) + break + time.sleep(0.5) + except Exception as e: + print("got exception when polling", e) + +# Number of requests +num_requests = 50 + +# Use ThreadPoolExecutor for parallel execution +with concurrent.futures.ThreadPoolExecutor(max_workers=num_requests) as executor: + # Create and poll each request in parallel + futures = [executor.submit(create_job_and_poll, i) for i in range(num_requests)] + + # Wait for all futures to complete + concurrent.futures.wait(futures) \ No newline at end of file