mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fix(main.py): fix streaming_chunk_builder to return usage
This commit is contained in:
parent
b1db3a38d7
commit
5a9a3aa89c
6 changed files with 133 additions and 117 deletions
|
@ -13,78 +13,6 @@ from concurrent.futures import ThreadPoolExecutor
|
|||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
def test_multiple_deployments():
|
||||
import concurrent, time
|
||||
litellm.set_verbose=False
|
||||
futures = {}
|
||||
model_list = [{ # list of model deployments
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": "bad-key",
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE")
|
||||
},
|
||||
"tpm": 240000,
|
||||
"rpm": 1800
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo", # openai model name
|
||||
"litellm_params": { # params for litellm completion/embedding call
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 1000000,
|
||||
"rpm": 9000
|
||||
}
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list,
|
||||
redis_host=os.getenv("REDIS_HOST"),
|
||||
redis_password=os.getenv("REDIS_PASSWORD"),
|
||||
redis_port=int(os.getenv("REDIS_PORT")),
|
||||
routing_strategy="simple-shuffle",
|
||||
set_verbose=False,
|
||||
num_retries=1) # type: ignore
|
||||
kwargs = {"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Hey, how's it going?"}],}
|
||||
|
||||
results = []
|
||||
|
||||
try:
|
||||
for _ in range(3):
|
||||
response = router.completion(**kwargs)
|
||||
results.append(response)
|
||||
router.flush_cache()
|
||||
except Exception as e:
|
||||
print(f"FAILED TEST!")
|
||||
pytest.fail(f"An error occurred - {str(e)}")
|
||||
|
||||
# start_time = time.time()
|
||||
# for _ in range(1000):
|
||||
# future = executor.submit(router.completion, **kwargs)
|
||||
# futures[future] = future
|
||||
|
||||
# # Retrieve the results from the futures
|
||||
# while futures:
|
||||
# done, not_done = concurrent.futures.wait(futures, timeout=10, return_when=concurrent.futures.FIRST_COMPLETED)
|
||||
# for future in done:
|
||||
# try:
|
||||
# result = future.result()
|
||||
# results.append(result)
|
||||
# futures.pop(future) # Remove the done future
|
||||
# except Exception as e:
|
||||
# print(f"Exception: {e}; traceback: {traceback.format_exc()}")
|
||||
# futures.pop(future) # Remove the done future with exception
|
||||
|
||||
# print(f"Remaining futures: {len(futures)}")
|
||||
|
||||
# end_time = time.time()
|
||||
# print(f"ELAPSED TIME: {end_time-start_time}")
|
||||
# Check results
|
||||
|
||||
|
||||
# test_multiple_deployments()
|
||||
|
||||
def test_exception_raising():
|
||||
# this tests if the router raises an exception when invalid params are set
|
||||
# in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue