forked from phoenix/litellm-mirror
test(test_router_fallbacks.py): fix test
This commit is contained in:
parent
40e19a838c
commit
cdec7a414f
4 changed files with 15 additions and 10 deletions
|
@ -662,6 +662,7 @@ def completion(
|
||||||
"region_name",
|
"region_name",
|
||||||
"allowed_model_region",
|
"allowed_model_region",
|
||||||
]
|
]
|
||||||
|
|
||||||
default_params = openai_params + litellm_params
|
default_params = openai_params + litellm_params
|
||||||
non_default_params = {
|
non_default_params = {
|
||||||
k: v for k, v in kwargs.items() if k not in default_params
|
k: v for k, v in kwargs.items() if k not in default_params
|
||||||
|
|
|
@ -102,6 +102,7 @@ class Router:
|
||||||
"usage-based-routing",
|
"usage-based-routing",
|
||||||
"latency-based-routing",
|
"latency-based-routing",
|
||||||
"cost-based-routing",
|
"cost-based-routing",
|
||||||
|
"usage-based-routing-v2",
|
||||||
] = "simple-shuffle",
|
] = "simple-shuffle",
|
||||||
routing_strategy_args: dict = {}, # just for latency-based routing
|
routing_strategy_args: dict = {}, # just for latency-based routing
|
||||||
semaphore: Optional[asyncio.Semaphore] = None,
|
semaphore: Optional[asyncio.Semaphore] = None,
|
||||||
|
|
|
@ -437,8 +437,9 @@ async def test_cost_tracking_with_caching():
|
||||||
max_tokens=40,
|
max_tokens=40,
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
caching=True,
|
caching=True,
|
||||||
|
mock_response="Hey, i'm doing well!",
|
||||||
)
|
)
|
||||||
await asyncio.sleep(1) # success callback is async
|
await asyncio.sleep(3) # success callback is async
|
||||||
response_cost = customHandler_optional_params.response_cost
|
response_cost = customHandler_optional_params.response_cost
|
||||||
assert response_cost > 0
|
assert response_cost > 0
|
||||||
response2 = await litellm.acompletion(
|
response2 = await litellm.acompletion(
|
||||||
|
|
|
@ -754,6 +754,9 @@ async def test_async_fallbacks_max_retries_per_request():
|
||||||
|
|
||||||
def test_ausage_based_routing_fallbacks():
|
def test_ausage_based_routing_fallbacks():
|
||||||
try:
|
try:
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.set_verbose = False
|
||||||
# [Prod Test]
|
# [Prod Test]
|
||||||
# IT tests Usage Based Routing with fallbacks
|
# IT tests Usage Based Routing with fallbacks
|
||||||
# The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4"
|
# The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4"
|
||||||
|
@ -766,10 +769,10 @@ def test_ausage_based_routing_fallbacks():
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Constants for TPM and RPM allocation
|
# Constants for TPM and RPM allocation
|
||||||
AZURE_FAST_RPM = 0
|
AZURE_FAST_RPM = 1
|
||||||
AZURE_BASIC_RPM = 0
|
AZURE_BASIC_RPM = 1
|
||||||
OPENAI_RPM = 0
|
OPENAI_RPM = 0
|
||||||
ANTHROPIC_RPM = 2
|
ANTHROPIC_RPM = 10
|
||||||
|
|
||||||
def get_azure_params(deployment_name: str):
|
def get_azure_params(deployment_name: str):
|
||||||
params = {
|
params = {
|
||||||
|
@ -832,9 +835,9 @@ def test_ausage_based_routing_fallbacks():
|
||||||
fallbacks=fallbacks_list,
|
fallbacks=fallbacks_list,
|
||||||
set_verbose=True,
|
set_verbose=True,
|
||||||
debug_level="DEBUG",
|
debug_level="DEBUG",
|
||||||
routing_strategy="usage-based-routing",
|
routing_strategy="usage-based-routing-v2",
|
||||||
redis_host=os.environ["REDIS_HOST"],
|
redis_host=os.environ["REDIS_HOST"],
|
||||||
redis_port=os.environ["REDIS_PORT"],
|
redis_port=int(os.environ["REDIS_PORT"]),
|
||||||
num_retries=0,
|
num_retries=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -853,8 +856,8 @@ def test_ausage_based_routing_fallbacks():
|
||||||
# the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
|
# the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
|
||||||
assert response._hidden_params["model_id"] == "1"
|
assert response._hidden_params["model_id"] == "1"
|
||||||
|
|
||||||
# now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
|
for i in range(10):
|
||||||
for i in range(3):
|
# now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
|
||||||
response = router.completion(
|
response = router.completion(
|
||||||
model="azure/gpt-4-fast",
|
model="azure/gpt-4-fast",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -863,8 +866,7 @@ def test_ausage_based_routing_fallbacks():
|
||||||
)
|
)
|
||||||
print("response: ", response)
|
print("response: ", response)
|
||||||
print("response._hidden_params: ", response._hidden_params)
|
print("response._hidden_params: ", response._hidden_params)
|
||||||
if i == 2:
|
if i == 9:
|
||||||
# by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
|
|
||||||
assert response._hidden_params["model_id"] == "4"
|
assert response._hidden_params["model_id"] == "4"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue