forked from phoenix/litellm-mirror
adding additional ways of doing testing
This commit is contained in:
parent
d6fb6ea897
commit
fd6367ca25
6 changed files with 185 additions and 851 deletions
BIN
dist/litellm-0.1.401-py3-none-any.whl
vendored
Normal file
BIN
dist/litellm-0.1.401-py3-none-any.whl
vendored
Normal file
Binary file not shown.
BIN
dist/litellm-0.1.401.tar.gz
vendored
Normal file
BIN
dist/litellm-0.1.401.tar.gz
vendored
Normal file
Binary file not shown.
|
@ -1,43 +1,75 @@
|
||||||
import litellm
|
import litellm
|
||||||
import time
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
import traceback
|
||||||
|
|
||||||
def batch_completion(*args, **kwargs):
|
def testing_batch_completion(*args, **kwargs):
|
||||||
batch_messages = args[1] if len(args) > 1 else kwargs.get("messages")
|
try:
|
||||||
results = []
|
batch_models = args[0] if len(args) > 0 else kwargs.pop("models") ## expected input format- ["gpt-3.5-turbo", {"model": "qvv0xeq", "custom_llm_provider"="baseten"}...]
|
||||||
completions = []
|
batch_messages = args[1] if len(args) > 1 else kwargs.pop("messages")
|
||||||
exceptions = []
|
results = []
|
||||||
times = []
|
completions = []
|
||||||
with ThreadPoolExecutor() as executor:
|
exceptions = []
|
||||||
for message_list in batch_messages:
|
times = []
|
||||||
if len(args) > 1:
|
with ThreadPoolExecutor() as executor:
|
||||||
args_modified = list(args)
|
for model in batch_models:
|
||||||
args_modified[1] = message_list
|
kwargs_modified = dict(kwargs)
|
||||||
future = executor.submit(litellm.completion, *args_modified)
|
args_modified = list(args)
|
||||||
else:
|
if len(args) > 0:
|
||||||
kwargs_modified = dict(kwargs)
|
args_modified[0] = model["model"]
|
||||||
kwargs_modified["messages"] = message_list
|
else:
|
||||||
future = executor.submit(litellm.completion, *args, **kwargs_modified)
|
kwargs_modified["model"] = model["model"] if isinstance(model, dict) and "model" in model else model # if model is a dictionary get it's value else assume it's a string
|
||||||
completions.append(future)
|
kwargs_modified["custom_llm_provider"] = model["custom_llm_provider"] if isinstance(model, dict) and "model" in model else None
|
||||||
|
for message_list in batch_messages:
|
||||||
# Retrieve the results and calculate elapsed time for each completion call
|
if len(args) > 1:
|
||||||
for future in completions:
|
args_modified[1] = message_list
|
||||||
start_time = time.time()
|
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
|
||||||
try:
|
else:
|
||||||
result = future.result()
|
kwargs_modified["messages"] = message_list
|
||||||
end_time = time.time()
|
future = executor.submit(litellm.completion, *args_modified, **kwargs_modified)
|
||||||
elapsed_time = end_time - start_time
|
completions.append(future)
|
||||||
result_dict = {"status": "succeeded", "response": future.result(), "response_time": elapsed_time}
|
|
||||||
results.append(result_dict)
|
# Retrieve the results and calculate elapsed time for each completion call
|
||||||
except Exception as e:
|
for future in completions:
|
||||||
end_time = time.time()
|
start_time = time.time()
|
||||||
elapsed_time = end_time - start_time
|
try:
|
||||||
result_dict = {"status": "succeeded", "response": e, "response_time": elapsed_time}
|
result = future.result()
|
||||||
results.append(result_dict)
|
end_time = time.time()
|
||||||
|
elapsed_time = end_time - start_time
|
||||||
return results
|
result_dict = {"status": "succeeded", "response": future.result(), "response_time": elapsed_time}
|
||||||
|
results.append(result_dict)
|
||||||
|
except Exception as e:
|
||||||
|
end_time = time.time()
|
||||||
|
elapsed_time = end_time - start_time
|
||||||
|
result_dict = {"status": "succeeded", "response": e, "response_time": elapsed_time}
|
||||||
|
results.append(result_dict)
|
||||||
|
return results
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base: str = None, prompt: str = None, num_calls: int = None, force_timeout: int = None):
|
def duration_test_model(original_function):
|
||||||
|
def wrapper_function(*args, **kwargs):
|
||||||
|
# Code to be executed before the original function
|
||||||
|
duration = kwargs.pop("duration", None)
|
||||||
|
interval = kwargs.pop("interval", None)
|
||||||
|
results = []
|
||||||
|
if duration and interval:
|
||||||
|
start_time = time.time()
|
||||||
|
end_time = start_time + duration # default to 1hr duration
|
||||||
|
while time.time() < end_time:
|
||||||
|
result = original_function(*args, **kwargs)
|
||||||
|
results.append(result)
|
||||||
|
time.sleep(interval)
|
||||||
|
else:
|
||||||
|
result = original_function(*args, **kwargs)
|
||||||
|
results = result
|
||||||
|
return results
|
||||||
|
|
||||||
|
# Return the wrapper function
|
||||||
|
return wrapper_function
|
||||||
|
|
||||||
|
@duration_test_model
|
||||||
|
def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base: str = None, prompt: str = None, num_calls: int = None, request_timeout: int = None):
|
||||||
test_prompt = "Hey, how's it going"
|
test_prompt = "Hey, how's it going"
|
||||||
test_calls = 100
|
test_calls = 100
|
||||||
if prompt:
|
if prompt:
|
||||||
|
@ -47,11 +79,12 @@ def load_test_model(model: str, custom_llm_provider: str = None, custom_api_base
|
||||||
messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
|
messages = [[{"role": "user", "content": test_prompt}] for _ in range(test_calls)]
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
results = batch_completion(model=model, messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base = custom_api_base, force_timeout=force_timeout)
|
results = testing_batch_completion(models=[model], messages=messages, custom_llm_provider=custom_llm_provider, custom_api_base = custom_api_base, force_timeout=request_timeout)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
response_time = end_time - start_time
|
response_time = end_time - start_time
|
||||||
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": test_prompt, "results": results}
|
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": test_prompt, "results": results}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
response_time = end_time - start_time
|
response_time = end_time - start_time
|
||||||
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": test_prompt, "exception": e}
|
return {"total_response_time": response_time, "calls_made": test_calls, "prompt": test_prompt, "exception": e}
|
|
@ -2,8 +2,21 @@ import sys, os
|
||||||
import traceback
|
import traceback
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import load_test_model
|
from litellm import load_test_model, testing_batch_completion
|
||||||
|
|
||||||
|
## Load Test Model
|
||||||
model="gpt-3.5-turbo"
|
model="gpt-3.5-turbo"
|
||||||
result = load_test_model(model=model, num_calls=5)
|
result = load_test_model(model=model, num_calls=5)
|
||||||
|
print(result)
|
||||||
|
print(len(result["results"]))
|
||||||
|
|
||||||
|
## Duration Test Model
|
||||||
|
model="gpt-3.5-turbo"
|
||||||
|
result = load_test_model(model=model, num_calls=5, duration=15, interval=15) # duration test the model for 2 minutes, sending 5 calls every 15s
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
## Quality Test across Model
|
||||||
|
models = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "claude-instant-1", {"model": "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781", "custom_llm_provider": "replicate"}]
|
||||||
|
messages = [[{"role": "user", "content": "What is your name?"}]]
|
||||||
|
result = testing_batch_completion(models=models, messages=messages)
|
||||||
print(result)
|
print(result)
|
914
poetry.lock
generated
914
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.401"
|
version = "0.1.402"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue