(test) router:get_available_deployment

This commit is contained in:
ishaan-jaff 2023-11-29 17:54:41 -08:00
parent 23af756531
commit 305faab542
2 changed files with 288 additions and 52 deletions

View file

@ -1,5 +1,5 @@
#### What this tests #### #### What this tests ####
# This tests calling batch_completions by running 100 messages together #This tests litellm router
import sys, os, time import sys, os, time
import traceback, asyncio import traceback, asyncio
@ -299,54 +299,3 @@ def test_aembedding_on_router():
traceback.print_exc() traceback.print_exc()
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_aembedding_on_router() # test_aembedding_on_router()
def test_weighted_selection_router():
# this tests if load balancing works based on the provided rpms in the router
# it's fast test, only tests get_available_deployment
# users can pass rpms as a litellm_param
try:
litellm.set_verbose = False
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0613",
"api_key": os.getenv("OPENAI_API_KEY"),
"rpm": 6,
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
"rpm": 1440,
},
}
]
router = Router(
model_list=model_list,
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = litellm.utils.remove_model_id(selected_model_id)
selection_counts[selected_model_name] +=1
print(selection_counts)
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
# test_weighted_selection_router()

View file

@ -0,0 +1,287 @@
# Tests for router.get_available_deployment
# specifically test if it can pick the correct LLM when rpm/tpm set
# These are fast Tests, and make no API calls
import sys, os, time
import traceback, asyncio
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
load_dotenv()
def test_weighted_selection_router():
# this tests if load balancing works based on the provided rpms in the router
# it's a fast test, only tests get_available_deployment
# users can pass rpms as a litellm_param
try:
litellm.set_verbose = False
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0613",
"api_key": os.getenv("OPENAI_API_KEY"),
"rpm": 6,
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
"rpm": 1440,
},
}
]
router = Router(
model_list=model_list,
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = litellm.utils.remove_model_id(selected_model_id)
selection_counts[selected_model_name] +=1
print(selection_counts)
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
# test_weighted_selection_router()
def test_weighted_selection_router_tpm():
# this tests if load balancing works based on the provided tpms in the router
# it's a fast test, only tests get_available_deployment
# users can pass rpms as a litellm_param
try:
print("\ntest weighted selection based on TPM\n")
litellm.set_verbose = False
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0613",
"api_key": os.getenv("OPENAI_API_KEY"),
"tpm": 5,
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
"tpm": 90,
},
}
]
router = Router(
model_list=model_list,
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = litellm.utils.remove_model_id(selected_model_id)
selection_counts[selected_model_name] +=1
print(selection_counts)
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
# test_weighted_selection_router_tpm()
def test_weighted_selection_router_tpm_as_router_param():
# this tests if load balancing works based on the provided tpms in the router
# it's a fast test, only tests get_available_deployment
# users can pass rpms as a litellm_param
try:
print("\ntest weighted selection based on TPM\n")
litellm.set_verbose = False
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0613",
"api_key": os.getenv("OPENAI_API_KEY"),
},
"tpm": 5,
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
},
"tpm": 90,
}
]
router = Router(
model_list=model_list,
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = litellm.utils.remove_model_id(selected_model_id)
selection_counts[selected_model_name] +=1
print(selection_counts)
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
test_weighted_selection_router_tpm_as_router_param()
def test_weighted_selection_router_rpm_as_router_param():
# this tests if load balancing works based on the provided tpms in the router
# it's a fast test, only tests get_available_deployment
# users can pass rpms as a litellm_param
try:
print("\ntest weighted selection based on RPM\n")
litellm.set_verbose = False
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0613",
"api_key": os.getenv("OPENAI_API_KEY"),
},
"rpm": 5,
"tpm": 5,
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
},
"rpm": 90,
"tpm": 90,
}
]
router = Router(
model_list=model_list,
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("gpt-3.5-turbo")
selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = litellm.utils.remove_model_id(selected_model_id)
selection_counts[selected_model_name] +=1
print(selection_counts)
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
assert selection_counts['azure/chatgpt-v-2'] / total_requests > 0.89, f"Assertion failed: 'azure/chatgpt-v-2' does not have about 90% of the total requests in the weighted load balancer. Selection counts {selection_counts}"
router.reset()
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
# test_weighted_selection_router_tpm_as_router_param()
def test_weighted_selection_router_no_rpm_set():
# this tests if we can do selection when no rpm is provided too
# it's a fast test, only tests get_available_deployment
# users can pass rpms as a litellm_param
try:
litellm.set_verbose = False
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo-0613",
"api_key": os.getenv("OPENAI_API_KEY"),
"rpm": 6,
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": os.getenv("AZURE_API_BASE"),
"api_version": os.getenv("AZURE_API_VERSION"),
"rpm": 1440,
},
},
{
"model_name": "claude-1",
"litellm_params": {
"model": "bedrock/claude1.2",
"rpm": 1440,
},
}
]
router = Router(
model_list=model_list,
)
selection_counts = defaultdict(int)
# call get_available_deployment 1k times, it should pick azure/chatgpt-v-2 about 90% of the time
for _ in range(1000):
selected_model = router.get_available_deployment("claude-1")
selected_model_id = selected_model["litellm_params"]["model"]
selected_model_name = litellm.utils.remove_model_id(selected_model_id)
selection_counts[selected_model_name] +=1
print(selection_counts)
total_requests = sum(selection_counts.values())
# Assert that 'azure/chatgpt-v-2' has about 90% of the total requests
assert selection_counts['bedrock/claude1.2'] / total_requests == 1, f"Assertion failed: Selection counts {selection_counts}"
router.reset()
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
test_weighted_selection_router_no_rpm_set()