From cdec7a414f6376fbde82816e4ac4dece6d4c1e7c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 10 May 2024 09:58:40 -0700 Subject: [PATCH] test(test_router_fallbacks.py): fix test --- litellm/main.py | 1 + litellm/router.py | 1 + litellm/tests/test_custom_logger.py | 3 ++- litellm/tests/test_router_fallbacks.py | 20 +++++++++++--------- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index 6fd4cdaab..72f5b1dc6 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -662,6 +662,7 @@ def completion( "region_name", "allowed_model_region", ] + default_params = openai_params + litellm_params non_default_params = { k: v for k, v in kwargs.items() if k not in default_params diff --git a/litellm/router.py b/litellm/router.py index 68f49a0a0..39d49a147 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -102,6 +102,7 @@ class Router: "usage-based-routing", "latency-based-routing", "cost-based-routing", + "usage-based-routing-v2", ] = "simple-shuffle", routing_strategy_args: dict = {}, # just for latency-based routing semaphore: Optional[asyncio.Semaphore] = None, diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py index 9c2afe5a3..c7df31214 100644 --- a/litellm/tests/test_custom_logger.py +++ b/litellm/tests/test_custom_logger.py @@ -437,8 +437,9 @@ async def test_cost_tracking_with_caching(): max_tokens=40, temperature=0.2, caching=True, + mock_response="Hey, i'm doing well!", ) - await asyncio.sleep(1) # success callback is async + await asyncio.sleep(3) # success callback is async response_cost = customHandler_optional_params.response_cost assert response_cost > 0 response2 = await litellm.acompletion( diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py index 0e001eeba..0bce9894b 100644 --- a/litellm/tests/test_router_fallbacks.py +++ b/litellm/tests/test_router_fallbacks.py @@ -754,6 +754,9 @@ async def test_async_fallbacks_max_retries_per_request(): def test_ausage_based_routing_fallbacks(): try: + import litellm + + litellm.set_verbose = False # [Prod Test] # IT tests Usage Based Routing with fallbacks # The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4" @@ -766,10 +769,10 @@ def test_ausage_based_routing_fallbacks(): load_dotenv() # Constants for TPM and RPM allocation - AZURE_FAST_RPM = 0 - AZURE_BASIC_RPM = 0 + AZURE_FAST_RPM = 1 + AZURE_BASIC_RPM = 1 OPENAI_RPM = 0 - ANTHROPIC_RPM = 2 + ANTHROPIC_RPM = 10 def get_azure_params(deployment_name: str): params = { @@ -832,9 +835,9 @@ def test_ausage_based_routing_fallbacks(): fallbacks=fallbacks_list, set_verbose=True, debug_level="DEBUG", - routing_strategy="usage-based-routing", + routing_strategy="usage-based-routing-v2", redis_host=os.environ["REDIS_HOST"], - redis_port=os.environ["REDIS_PORT"], + redis_port=int(os.environ["REDIS_PORT"]), num_retries=0, ) @@ -853,8 +856,8 @@ def test_ausage_based_routing_fallbacks(): # the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM assert response._hidden_params["model_id"] == "1" - # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2 - for i in range(3): + for i in range(10): + # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2 response = router.completion( model="azure/gpt-4-fast", messages=messages, @@ -863,8 +866,7 @@ def test_ausage_based_routing_fallbacks(): ) print("response: ", response) print("response._hidden_params: ", response._hidden_params) - if i == 2: - # by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2 + if i == 9: assert response._hidden_params["model_id"] == "4" except Exception as e: