mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 19:54:13 +00:00
(test) usage based routing with fallbacks
This commit is contained in:
parent
4e89be0e19
commit
c20d9299cc
1 changed files with 20 additions and 4 deletions
|
@ -716,7 +716,7 @@ def test_usage_based_routing_fallbacks():
|
|||
# Constants for TPM and RPM allocation
|
||||
AZURE_FAST_TPM = 3
|
||||
AZURE_BASIC_TPM = 4
|
||||
OPENAI_TPM = 2000
|
||||
OPENAI_TPM = 400
|
||||
ANTHROPIC_TPM = 100000
|
||||
|
||||
def get_azure_params(deployment_name: str):
|
||||
|
@ -775,6 +775,7 @@ def test_usage_based_routing_fallbacks():
|
|||
model_list=model_list,
|
||||
fallbacks=fallbacks_list,
|
||||
set_verbose=True,
|
||||
debug_level="DEBUG",
|
||||
routing_strategy="usage-based-routing",
|
||||
redis_host=os.environ["REDIS_HOST"],
|
||||
redis_port=os.environ["REDIS_PORT"],
|
||||
|
@ -783,17 +784,32 @@ def test_usage_based_routing_fallbacks():
|
|||
messages = [
|
||||
{"content": "Tell me a joke.", "role": "user"},
|
||||
]
|
||||
|
||||
response = router.completion(
|
||||
model="azure/gpt-4-fast", messages=messages, timeout=5
|
||||
model="azure/gpt-4-fast",
|
||||
messages=messages,
|
||||
timeout=5,
|
||||
mock_response="very nice to meet you",
|
||||
)
|
||||
print("response: ", response)
|
||||
print("response._hidden_params: ", response._hidden_params)
|
||||
|
||||
# in this test, we expect azure/gpt-4 fast to fail, then azure-gpt-4 basic to fail and then openai-gpt-4 to pass
|
||||
# the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
|
||||
assert response._hidden_params["custom_llm_provider"] == "openai"
|
||||
|
||||
# now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
|
||||
for i in range(20):
|
||||
response = router.completion(
|
||||
model="azure/gpt-4-fast",
|
||||
messages=messages,
|
||||
timeout=5,
|
||||
mock_response="very nice to meet you",
|
||||
)
|
||||
print("response: ", response)
|
||||
print("response._hidden_params: ", response._hidden_params)
|
||||
if i == 19:
|
||||
# by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
|
||||
assert response._hidden_params["custom_llm_provider"] == "anthropic"
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred {e}")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue