diff --git a/litellm/testing.py b/litellm/testing.py index 93027eccb..b3d786fa8 100644 --- a/litellm/testing.py +++ b/litellm/testing.py @@ -27,16 +27,17 @@ def testing_batch_completion(*args, **kwargs): else: kwargs_modified["messages"] = message_list future = executor.submit(litellm.completion, *args_modified, **kwargs_modified) - completions.append(future) + completions.append((future, message_list)) # Retrieve the results and calculate elapsed time for each completion call - for future in completions: + for completion in completions: + future, message_list = completion start_time = time.time() try: result = future.result() end_time = time.time() elapsed_time = end_time - start_time - result_dict = {"status": "succeeded", "response": future.result(), "response_time": elapsed_time} + result_dict = {"status": "succeeded", "response": future.result(), "prompt": message_list, "response_time": elapsed_time} results.append(result_dict) except Exception as e: end_time = time.time() diff --git a/litellm/tests/test_load_test_model.py b/litellm/tests/test_load_test_model.py index 49f657c34..8040dabe7 100644 --- a/litellm/tests/test_load_test_model.py +++ b/litellm/tests/test_load_test_model.py @@ -4,19 +4,19 @@ sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the import litellm from litellm import load_test_model, testing_batch_completion -## Load Test Model -model="gpt-3.5-turbo" -result = load_test_model(model=model, num_calls=5) -print(result) -print(len(result["results"])) +# ## Load Test Model +# model="gpt-3.5-turbo" +# result = load_test_model(model=model, num_calls=5) +# print(result) +# print(len(result["results"])) -## Duration Test Model -model="gpt-3.5-turbo" -result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s -print(result) +# ## Duration Test Model +# model="gpt-3.5-turbo" +# result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s +# print(result) ## Quality Test across Model models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "claude-instant-1", {"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", "custom_llm_provider": "replicate"}] -messages = [[{"role": "user", "content": "What is your name?"}]] +messages = [[{"role": "user", "content": "What is your name?"}], [{"role": "user", "content": "Hey, how's it going?"}]] result = testing_batch_completion(models=models, messages=messages) print(result) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 851197166..0545d8dc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.402" +version = "0.1.403" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"