diff --git a/litellm/testing.py b/litellm/testing.py
index 93027eccb..b3d786fa8 100644
--- a/litellm/testing.py
+++ b/litellm/testing.py
@@ -27,16 +27,17 @@ def testing_batch_completion(*args, **kwargs):
                 else:
                     kwargs_modified["messages"] = message_list
                     future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
-            completions.append(future)
+                completions.append((future, message_list))
     
     # Retrieve the results and calculate elapsed time for each completion call
-    for future in completions:
+    for completion in completions:
+        future, message_list = completion
         start_time = time.time()
         try:
             result = future.result()
             end_time = time.time()
             elapsed_time = end_time - start_time
-            result_dict = {"status": "succeeded", "response": future.result(), "response_time": elapsed_time}
+            result_dict = {"status": "succeeded", "response": future.result(), "prompt": message_list, "response_time": elapsed_time}
             results.append(result_dict)
         except Exception as e:
             end_time = time.time()
diff --git a/litellm/tests/test_load_test_model.py b/litellm/tests/test_load_test_model.py
index 49f657c34..8040dabe7 100644
--- a/litellm/tests/test_load_test_model.py
+++ b/litellm/tests/test_load_test_model.py
@@ -4,19 +4,19 @@ sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the
 import litellm
 from litellm import load_test_model, testing_batch_completion
 
-## Load Test Model 
-model="gpt-3.5-turbo"
-result = load_test_model(model=model, num_calls=5)
-print(result)
-print(len(result["results"]))
+# ## Load Test Model 
+# model="gpt-3.5-turbo"
+# result = load_test_model(model=model, num_calls=5)
+# print(result)
+# print(len(result["results"]))
 
-## Duration Test Model 
-model="gpt-3.5-turbo"
-result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s
-print(result)
+# ## Duration Test Model 
+# model="gpt-3.5-turbo"
+# result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s
+# print(result)
 
 ## Quality Test across Model 
 models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "claude-instant-1", {"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", "custom_llm_provider": "replicate"}]
-messages =  [[{"role": "user", "content": "What is your name?"}]]
+messages =  [[{"role": "user", "content": "What is your name?"}], [{"role": "user", "content": "Hey, how's it going?"}]]
 result = testing_batch_completion(models=models, messages=messages)
 print(result)
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 851197166..0545d8dc8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.402"
+version = "0.1.403"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"