adding additional ways of doing testing

2023-08-16 11:19:05 -07:00 · 2023-08-16 11:19:05 -07:00 · fd6367ca25
commit fd6367ca25
parent d6fb6ea897
6 changed files with 185 additions and 851 deletions
--- a/dist/litellm-0.1.401-py3-none-any.whl
+++ b/dist/litellm-0.1.401-py3-none-any.whl
--- a/dist/litellm-0.1.401.tar.gz
+++ b/dist/litellm-0.1.401.tar.gz
--- a/litellm/testing.py
+++ b/litellm/testing.py
@ -1,43 +1,75 @@
 import litellm
 import time 
 from concurrent.futures import ThreadPoolExecutor
 import traceback
-def batch_completion(*args, **kwargs):
+def testing_batch_completion(*args, **kwargs):
-  batch_messages = args[1] if len(args) > 1 else kwargs.get("messages")
+  try:
-  results = []
+    batch_models = args[0] if len(args) > 0 else kwargs.pop("models") ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
-  completions = []
+    batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
-  exceptions = []
+    results = []
-  times = []
+    completions = []
-  with ThreadPoolExecutor() as executor:
+    exceptions = []
-    for message_list in batch_messages:
+    times = []
-      if len(args) > 1:
+    with ThreadPoolExecutor() as executor:
-        args_modified = list(args)
+        for model in batch_models:
-        args_modified[1] = message_list
+            kwargs_modified = dict(kwargs)
-        future = executor.submit(litellm.completion, *args_modified)
+            args_modified = list(args)
-      else:
+            if len(args) > 0:
-        kwargs_modified = dict(kwargs)
+                args_modified[0] = model["model"]
-        kwargs_modified["messages"] = message_list
+            else: 
-        future = executor.submit(litellm.completion, *args, **kwargs_modified)
+                kwargs_modified["model"] = model["model"] if isinstance(model, dict) and "model" in model else model # if model is a dictionary get it's value else assume it's a string
-      completions.append(future)
+                kwargs_modified["custom_llm_provider"] = model["custom_llm_provider"] if isinstance(model, dict) and "model" in model else None
-  
+            for message_list in batch_messages:
-  # Retrieve the results and calculate elapsed time for each completion call
+                if len(args) > 1:
-  for future in completions:
+                    args_modified[1] = message_list
-    start_time = time.time()
+                    future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
-    try:
+                else:
-      result = future.result()
+                    kwargs_modified["messages"] = message_list
-      end_time = time.time()
+                    future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
-      elapsed_time = end_time - start_time
+            completions.append(future)
-      result_dict = {"status": "succeeded", "response": future.result(), "response_time": elapsed_time}
+    
-      results.append(result_dict)
+    # Retrieve the results and calculate elapsed time for each completion call
-    except Exception as e:
+    for future in completions:
-      end_time = time.time()
+        start_time = time.time()
-      elapsed_time = end_time - start_time
+        try:
-      result_dict = {"status": "succeeded", "response": e, "response_time": elapsed_time}
+            result = future.result()
-      results.append(result_dict)
+            end_time = time.time()
-  
+            elapsed_time = end_time - start_time
-  return results
+            result_dict = {"status": "succeeded", "response": future.result(), "response_time": elapsed_time}
            results.append(result_dict)
        except Exception as e:
            end_time = time.time()
            elapsed_time = end_time - start_time
            result_dict = {"status": "succeeded", "response": e, "response_time": elapsed_time}
            results.append(result_dict)
    return results
  except:
     traceback.print_exc()
-def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base: str = None, prompt: str = None, num_calls: int = None, force_timeout: int = None):
+def duration_test_model(original_function):
    def wrapper_function(*args, **kwargs):
        # Code to be executed before the original function
        duration = kwargs.pop("duration", None)
        interval = kwargs.pop("interval", None)
        results = []
        if duration and interval:
            start_time = time.time()
            end_time = start_time + duration  # default to 1hr duration
            while time.time() < end_time:
                result = original_function(*args, **kwargs)
                results.append(result)
                time.sleep(interval)
        else:
            result = original_function(*args, **kwargs)
            results = result
        return results
    # Return the wrapper function
    return wrapper_function
@duration_test_model
 def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base: str = None, prompt: str = None, num_calls: int = None, request_timeout: int = None):
  test_prompt = "Hey, how's it going"
  test_calls = 100
  if prompt:
@ -47,11 +79,12 @@ def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base
  messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
  start_time = time.time()
  try:
-    results = batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base = custom_api_base, force_timeout=force_timeout)
+    results = testing_batch_completion(models=[model], messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base = custom_api_base, force_timeout=request_timeout)
    end_time = time.time() 
    response_time = end_time - start_time
    return {"total_response_time": response_time, "calls_made": test_calls, "prompt": test_prompt, "results": results}
  except Exception as e:
    traceback.print_exc()
    end_time = time.time() 
    response_time = end_time - start_time
    return {"total_response_time": response_time, "calls_made": test_calls, "prompt": test_prompt, "exception": e}
--- a/litellm/tests/test_load_test_model.py
+++ b/litellm/tests/test_load_test_model.py
@ -2,8 +2,21 @@ import sys, os
 import traceback
 sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
 import litellm
-from litellm import load_test_model
+from litellm import load_test_model, testing_batch_completion
 ## Load Test Model 
 model="gpt-3.5-turbo"
 result = load_test_model(model=model, num_calls=5)
 print(result)
 print(len(result["results"]))
 ## Duration Test Model 
 model="gpt-3.5-turbo"
 result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s
 print(result)
 ## Quality Test across Model 
 models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "claude-instant-1", {"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", "custom_llm_provider": "replicate"}]
 messages =  [[{"role": "user", "content": "What is your name?"}]]
 result = testing_batch_completion(models=models, messages=messages)
 print(result)
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.401"
+version = "0.1.402"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"