mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
add batch testing
This commit is contained in:
parent
6f657df905
commit
eeeb95a6ac
2 changed files with 20 additions and 10 deletions
|
@ -1208,23 +1208,32 @@ def batch_completion_models_all_responses(*args, **kwargs):
|
||||||
It sends requests concurrently and collects responses from all models that respond.
|
It sends requests concurrently and collects responses from all models that respond.
|
||||||
"""
|
"""
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
|
||||||
|
# ANSI escape codes for colored output
|
||||||
|
GREEN = "\033[92m"
|
||||||
|
RED = "\033[91m"
|
||||||
|
RESET = "\033[0m"
|
||||||
|
|
||||||
if "model" in kwargs:
|
if "model" in kwargs:
|
||||||
kwargs.pop("model")
|
kwargs.pop("model")
|
||||||
if "models" in kwargs:
|
if "models" in kwargs:
|
||||||
models = kwargs["models"]
|
models = kwargs["models"]
|
||||||
kwargs.pop("models")
|
kwargs.pop("models")
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
|
|
||||||
futures = [executor.submit(completion, *args, model=model, **kwargs) for model in models]
|
|
||||||
|
|
||||||
# Collect responses from all models that respond
|
responses = []
|
||||||
responses = [future.result() for future in concurrent.futures.as_completed(futures) if future.result() is not None]
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
|
||||||
|
for idx, model in enumerate(models):
|
||||||
|
print(f"{GREEN}LiteLLM: Making request to model: {model}{RESET}")
|
||||||
|
future = executor.submit(completion, *args, model=model, **kwargs)
|
||||||
|
if future.result() is not None:
|
||||||
|
responses.append(future.result())
|
||||||
|
print(f"{GREEN}LiteLLM: Model {model} returned response{RESET}")
|
||||||
|
else:
|
||||||
|
print(f"{RED}LiteLLM: Model {model } did not return a response{RESET}")
|
||||||
|
|
||||||
return responses
|
return responses
|
||||||
|
|
||||||
return [] # If no response is received from any model, return an empty list
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### EMBEDDING ENDPOINTS ####################
|
### EMBEDDING ENDPOINTS ####################
|
||||||
@client
|
@client
|
||||||
@timeout( # type: ignore
|
@timeout( # type: ignore
|
||||||
|
|
|
@ -38,10 +38,11 @@ def test_batch_completions_models():
|
||||||
def test_batch_completion_models_all_responses():
|
def test_batch_completion_models_all_responses():
|
||||||
responses = batch_completion_models_all_responses(
|
responses = batch_completion_models_all_responses(
|
||||||
models=["gpt-3.5-turbo", "claude-instant-1.2", "command-nightly"],
|
models=["gpt-3.5-turbo", "claude-instant-1.2", "command-nightly"],
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going"}],
|
messages=[{"role": "user", "content": "write a poem"}],
|
||||||
max_tokens=5
|
max_tokens=500
|
||||||
)
|
)
|
||||||
print(responses)
|
print(responses)
|
||||||
|
assert(len(responses) == 3)
|
||||||
# test_batch_completion_models_all_responses()
|
# test_batch_completion_models_all_responses()
|
||||||
|
|
||||||
# def test_batch_completions():
|
# def test_batch_completions():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue