(test) router:get_available_deployment

2025-04-24 18:24:20 +00:00 · 2023-11-29 17:54:41 -08:00 · 2023-11-29 17:54:41 -08:00 · 305faab542
commit 305faab542
parent 23af756531
2 changed files with 288 additions and 52 deletions
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -1,5 +1,5 @@
 #### What this tests ####
-#    This tests calling batch_completions by running 100 messages together
+#This tests litellm router
 import sys, os, time
 import traceback, asyncio
@ -299,54 +299,3 @@ def test_aembedding_on_router():
 		traceback.print_exc()
 		pytest.fail(f"Error occurred: {e}")
 # test_aembedding_on_router()
 def test_weighted_selection_router(): 
 	# this tests if load balancing works based on the provided rpms in the router
 	# it's fast test, only tests get_available_deployment
 	# users can pass rpms as a litellm_param
 	try:
 		litellm.set_verbose = False
 		model_list = [
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "gpt-3.5-turbo-0613",
 					"api_key": os.getenv("OPENAI_API_KEY"),
 					"rpm": 6,
 				},
 			},
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "azure/chatgpt-v-2",
 					"api_key": os.getenv("AZURE_API_KEY"),
 					"api_base": os.getenv("AZURE_API_BASE"),
 					"api_version": os.getenv("AZURE_API_VERSION"),
 					"rpm": 1440,
 				},
 			}
 		]
 		router = Router(
 			model_list=model_list, 
 		)
 		selection_counts = defaultdict(int)
 		# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
 		for _ in range(1000):
 			selected_model = router.get_available_deployment("gpt-3.5-turbo")
 			selected_model_id = selected_model["litellm_params"]["model"]
 			selected_model_name = litellm.utils.remove_model_id(selected_model_id)
 			selection_counts[selected_model_name] +=1
 		print(selection_counts)
 		total_requests = sum(selection_counts.values())
 		# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
 		assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 		router.reset()
 	except Exception as e:
 		traceback.print_exc()
 		pytest.fail(f"Error occurred: {e}")
 # test_weighted_selection_router()
--- a/litellm/tests/test_router_get_deployments.py
+++ b/litellm/tests/test_router_get_deployments.py
@ -0,0 +1,287 @@
 # Tests for router.get_available_deployment
 # specifically test if it can pick the correct LLM when rpm/tpm set
 # These are fast Tests, and make no API calls
 import sys, os, time
 import traceback, asyncio
 import pytest
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import Router
 from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from dotenv import load_dotenv
 load_dotenv()
 def test_weighted_selection_router(): 
 	# this tests if load balancing works based on the provided rpms in the router
 	# it's a fast test, only tests get_available_deployment
 	# users can pass rpms as a litellm_param
 	try:
 		litellm.set_verbose = False
 		model_list = [
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "gpt-3.5-turbo-0613",
 					"api_key": os.getenv("OPENAI_API_KEY"),
 					"rpm": 6,
 				},
 			},
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "azure/chatgpt-v-2",
 					"api_key": os.getenv("AZURE_API_KEY"),
 					"api_base": os.getenv("AZURE_API_BASE"),
 					"api_version": os.getenv("AZURE_API_VERSION"),
 					"rpm": 1440,
 				},
 			}
 		]
 		router = Router(
 			model_list=model_list, 
 		)
 		selection_counts = defaultdict(int)
 		# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
 		for _ in range(1000):
 			selected_model = router.get_available_deployment("gpt-3.5-turbo")
 			selected_model_id = selected_model["litellm_params"]["model"]
 			selected_model_name = litellm.utils.remove_model_id(selected_model_id)
 			selection_counts[selected_model_name] +=1
 		print(selection_counts)
 		total_requests = sum(selection_counts.values())
 		# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
 		assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 		router.reset()
 	except Exception as e:
 		traceback.print_exc()
 		pytest.fail(f"Error occurred: {e}")
 # test_weighted_selection_router()
 def test_weighted_selection_router_tpm(): 
 	# this tests if load balancing works based on the provided tpms in the router
 	# it's a fast test, only tests get_available_deployment
 	# users can pass rpms as a litellm_param
 	try:
 		print("\ntest weighted selection based on TPM\n")
 		litellm.set_verbose = False
 		model_list = [
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "gpt-3.5-turbo-0613",
 					"api_key": os.getenv("OPENAI_API_KEY"),
 					"tpm": 5,
 				},
 			},
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "azure/chatgpt-v-2",
 					"api_key": os.getenv("AZURE_API_KEY"),
 					"api_base": os.getenv("AZURE_API_BASE"),
 					"api_version": os.getenv("AZURE_API_VERSION"),
 					"tpm": 90,
 				},
 			}
 		]
 		router = Router(
 			model_list=model_list, 
 		)
 		selection_counts = defaultdict(int)
 		# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
 		for _ in range(1000):
 			selected_model = router.get_available_deployment("gpt-3.5-turbo")
 			selected_model_id = selected_model["litellm_params"]["model"]
 			selected_model_name = litellm.utils.remove_model_id(selected_model_id)
 			selection_counts[selected_model_name] +=1
 		print(selection_counts)
 		total_requests = sum(selection_counts.values())
 		# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
 		assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 		router.reset()
 	except Exception as e:
 		traceback.print_exc()
 		pytest.fail(f"Error occurred: {e}")
 # test_weighted_selection_router_tpm()
 def test_weighted_selection_router_tpm_as_router_param(): 
 	# this tests if load balancing works based on the provided tpms in the router
 	# it's a fast test, only tests get_available_deployment
 	# users can pass rpms as a litellm_param
 	try:
 		print("\ntest weighted selection based on TPM\n")
 		litellm.set_verbose = False
 		model_list = [
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "gpt-3.5-turbo-0613",
 					"api_key": os.getenv("OPENAI_API_KEY"),
 				},
 				"tpm": 5,
 			},
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "azure/chatgpt-v-2",
 					"api_key": os.getenv("AZURE_API_KEY"),
 					"api_base": os.getenv("AZURE_API_BASE"),
 					"api_version": os.getenv("AZURE_API_VERSION"),
 				},
 				"tpm": 90,
 			}
 		]
 		router = Router(
 			model_list=model_list, 
 		)
 		selection_counts = defaultdict(int)
 		# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
 		for _ in range(1000):
 			selected_model = router.get_available_deployment("gpt-3.5-turbo")
 			selected_model_id = selected_model["litellm_params"]["model"]
 			selected_model_name = litellm.utils.remove_model_id(selected_model_id)
 			selection_counts[selected_model_name] +=1
 		print(selection_counts)
 		total_requests = sum(selection_counts.values())
 		# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
 		assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 		router.reset()
 	except Exception as e:
 		traceback.print_exc()
 		pytest.fail(f"Error occurred: {e}")
 test_weighted_selection_router_tpm_as_router_param()
 def test_weighted_selection_router_rpm_as_router_param(): 
 	# this tests if load balancing works based on the provided tpms in the router
 	# it's a fast test, only tests get_available_deployment
 	# users can pass rpms as a litellm_param
 	try:
 		print("\ntest weighted selection based on RPM\n")
 		litellm.set_verbose = False
 		model_list = [
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "gpt-3.5-turbo-0613",
 					"api_key": os.getenv("OPENAI_API_KEY"),
 				},
 				"rpm": 5,
 				"tpm": 5,
 			},
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "azure/chatgpt-v-2",
 					"api_key": os.getenv("AZURE_API_KEY"),
 					"api_base": os.getenv("AZURE_API_BASE"),
 					"api_version": os.getenv("AZURE_API_VERSION"),
 				},
 				"rpm": 90,
 				"tpm": 90,
 			}
 		]
 		router = Router(
 			model_list=model_list, 
 		)
 		selection_counts = defaultdict(int)
 		# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
 		for _ in range(1000):
 			selected_model = router.get_available_deployment("gpt-3.5-turbo")
 			selected_model_id = selected_model["litellm_params"]["model"]
 			selected_model_name = litellm.utils.remove_model_id(selected_model_id)
 			selection_counts[selected_model_name] +=1
 		print(selection_counts)
 		total_requests = sum(selection_counts.values())
 		# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
 		assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
 		router.reset()
 	except Exception as e:
 		traceback.print_exc()
 		pytest.fail(f"Error occurred: {e}")
 # test_weighted_selection_router_tpm_as_router_param()
 def test_weighted_selection_router_no_rpm_set(): 
 	# this tests if we can do selection when no rpm is provided too
 	# it's a fast test, only tests get_available_deployment
 	# users can pass rpms as a litellm_param
 	try:
 		litellm.set_verbose = False
 		model_list = [
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "gpt-3.5-turbo-0613",
 					"api_key": os.getenv("OPENAI_API_KEY"),
 					"rpm": 6,
 				},
 			},
 			{
 				"model_name": "gpt-3.5-turbo",
 				"litellm_params": {
 					"model": "azure/chatgpt-v-2",
 					"api_key": os.getenv("AZURE_API_KEY"),
 					"api_base": os.getenv("AZURE_API_BASE"),
 					"api_version": os.getenv("AZURE_API_VERSION"),
 					"rpm": 1440,
 				},
 			},
 			{
 				"model_name": "claude-1",
 				"litellm_params": {
 					"model": "bedrock/claude1.2",
 					"rpm": 1440,
 				},
 			}
 		]
 		router = Router(
 			model_list=model_list, 
 		)
 		selection_counts = defaultdict(int)
 		# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
 		for _ in range(1000):
 			selected_model = router.get_available_deployment("claude-1")
 			selected_model_id = selected_model["litellm_params"]["model"]
 			selected_model_name = litellm.utils.remove_model_id(selected_model_id)
 			selection_counts[selected_model_name] +=1
 		print(selection_counts)
 		total_requests = sum(selection_counts.values())
 		# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
 		assert selection_counts['bedrock/claude1.2'] / total_requests == 1, f"Assertion failed: Selection counts {selection_counts}"
 		router.reset()
 	except Exception as e:
 		traceback.print_exc()
 		pytest.fail(f"Error occurred: {e}")
 test_weighted_selection_router_no_rpm_set()