fix(main.py): fix streaming_chunk_builder to return usage

2025-04-27 11:43:54 +00:00 · 2023-11-24 11:27:04 -08:00 · 2023-11-24 11:27:04 -08:00 · 5a9a3aa89c
commit 5a9a3aa89c
parent b1db3a38d7
6 changed files with 133 additions and 117 deletions
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -13,78 +13,6 @@ from concurrent.futures import ThreadPoolExecutor
 from dotenv import load_dotenv
 load_dotenv()

-def test_multiple_deployments(): 
-	import concurrent, time
-	litellm.set_verbose=False
-	futures = {}
-	model_list = [{ # list of model deployments 
-		"model_name": "gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "azure/chatgpt-v-2", 
-			"api_key": "bad-key",
-			"api_version": os.getenv("AZURE_API_VERSION"),
-			"api_base": os.getenv("AZURE_API_BASE")
-		},
-		"tpm": 240000,
-		"rpm": 1800
-	}, 
-	{
-		"model_name": "gpt-3.5-turbo", # openai model name 
-		"litellm_params": { # params for litellm completion/embedding call 
-			"model": "gpt-3.5-turbo", 
-			"api_key": os.getenv("OPENAI_API_KEY"),
-		},
-		"tpm": 1000000,
-		"rpm": 9000
-	}
-	]
-
-	router = Router(model_list=model_list, 
-				 redis_host=os.getenv("REDIS_HOST"), 
-				 redis_password=os.getenv("REDIS_PASSWORD"), 
-				 redis_port=int(os.getenv("REDIS_PORT")), 
-				 routing_strategy="simple-shuffle",
-				 set_verbose=False,
-				 num_retries=1) # type: ignore
-	kwargs = {"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Hey, how's it going?"}],}
-	
-	results = [] 
-	
-	try:
-		for _ in range(3): 
-			response = router.completion(**kwargs)
-			results.append(response)
-		router.flush_cache()
-	except Exception as e:
-		print(f"FAILED TEST!")
-		pytest.fail(f"An error occurred - {str(e)}")
-
-	# 	start_time = time.time()
-	# 	for _ in range(1000):
-	# 		future = executor.submit(router.completion, **kwargs)
-	# 		futures[future] = future
-
-	# 	# Retrieve the results from the futures
-	# 	while futures:
-	# 		done, not_done = concurrent.futures.wait(futures, timeout=10, return_when=concurrent.futures.FIRST_COMPLETED)
-	# 		for future in done:
-	# 			try:
-	# 				result = future.result()
-	# 				results.append(result)
-	# 				futures.pop(future)  # Remove the done future
-	# 			except Exception as e:
-	# 				print(f"Exception: {e}; traceback: {traceback.format_exc()}")
-	# 				futures.pop(future)  # Remove the done future with exception
-
-	# 		print(f"Remaining futures: {len(futures)}")
-
-	# 	end_time = time.time() 
-	# 	print(f"ELAPSED TIME: {end_time-start_time}")
-		# Check results
-
-
-# test_multiple_deployments()
-
 def test_exception_raising():
 	# this tests if the router raises an exception when invalid params are set
 	# in this test both deployments have bad keys - Keep this test. It validates if the router raises the most recent exception