(test) load test queing

2023-11-21 14:04:39 -08:00 · 2023-11-21 14:04:39 -08:00 · 8093a4fc6f
commit 8093a4fc6f
parent 34886a99cc
1 changed files with 31 additions and 39 deletions
--- a/cookbook/litellm_router/load_test_queuing.py
+++ b/cookbook/litellm_router/load_test_queuing.py
@ -93,25 +93,25 @@ def make_openai_completion(question):
        # polling the url 
        while True:
            try:
                print("entering the loop to poll")
                print("response", response)
                url = response["url"]
                polling_url = f"http://0.0.0.0:8000{url}"
                print(f"POLLING JOB{polling_url}")
                polling_response = requests.get(polling_url)
                print("\n RESPONSE FROM POLLING JoB", polling_response)
                polling_response = polling_response.json()
                print("\n RESPONSE FROM POLLING JoB", polling_response)
                status = polling_response["status"]
                if status == "finished":
                    llm_response = polling_response["result"]
                    with open("response_log.txt", "a") as log_file:
                        log_file.write(
                            f"Response ID: {llm_response.get('id', 'NA')}\nLLM Response: {llm_response}\nTime: {end_time - start_time:.2f} seconds\n\n"
                        )
                    break
                print(f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}")
                time.sleep(0.5)
            except Exception as e:
                print("got exception in polling", e)
                break
        # if status == "finished":
        #     print()
        return response
    except Exception as e:
@ -121,42 +121,34 @@ def make_openai_completion(question):
                f"Question: {question[:100]}\nException: {str(e)}\n\n"
            )
        return None
 make_openai_completion("hello what's the time")
-# # Number of concurrent calls (you can adjust this)
+# Number of concurrent calls (you can adjust this)
-# concurrent_calls = 1
+concurrent_calls = 10
-# # List to store the futures of concurrent calls
+# List to store the futures of concurrent calls
-# futures = []
+futures = []
-# # Make concurrent calls
+# Make concurrent calls
-# with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
+with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
-#     for _ in range(concurrent_calls):
+    for _ in range(concurrent_calls):
-#         random_question = random.choice(questions)
+        random_question = random.choice(questions)
-#         futures.append(executor.submit(make_openai_completion, random_question))
+        futures.append(executor.submit(make_openai_completion, random_question))
-# # Wait for all futures to complete
+# Wait for all futures to complete
-# concurrent.futures.wait(futures)
+concurrent.futures.wait(futures)
-# # Summarize the results
+# Summarize the results
-# successful_calls = 0
+successful_calls = 0
-# failed_calls = 0
+failed_calls = 0
-# for future in futures:
+for future in futures:
-#     if future.result() is not None:
+    if future.result() is not None:
-#         successful_calls += 1
+        successful_calls += 1
-#     else:
+    else:
-#         failed_calls += 1
+        failed_calls += 1
-# print(f"Load test Summary:")
+print(f"Load test Summary:")
-# print(f"Total Requests: {concurrent_calls}")
+print(f"Total Requests: {concurrent_calls}")
-# print(f"Successful Calls: {successful_calls}")
+print(f"Successful Calls: {successful_calls}")
-# print(f"Failed Calls: {failed_calls}")
+print(f"Failed Calls: {failed_calls}")
 # # Display content of the logs
 # with open("request_log.txt", "r") as log_file:
 #     print("\nRequest Log:\n", log_file.read())
 # with open("error_log.txt", "r") as error_log_file:
 #     print("\nError Log:\n", error_log_file.read())