From cdec7a414f6376fbde82816e4ac4dece6d4c1e7c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Fri, 10 May 2024 09:58:40 -0700
Subject: [PATCH] test(test_router_fallbacks.py): fix test

---
 litellm/main.py                        |  1 +
 litellm/router.py                      |  1 +
 litellm/tests/test_custom_logger.py    |  3 ++-
 litellm/tests/test_router_fallbacks.py | 20 +++++++++++---------
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 6fd4cdaab..72f5b1dc6 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -662,6 +662,7 @@ def completion(
         "region_name",
         "allowed_model_region",
     ]
+
     default_params = openai_params + litellm_params
     non_default_params = {
         k: v for k, v in kwargs.items() if k not in default_params
diff --git a/litellm/router.py b/litellm/router.py
index 68f49a0a0..39d49a147 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -102,6 +102,7 @@ class Router:
             "usage-based-routing",
             "latency-based-routing",
             "cost-based-routing",
+            "usage-based-routing-v2",
         ] = "simple-shuffle",
         routing_strategy_args: dict = {},  # just for latency-based routing
         semaphore: Optional[asyncio.Semaphore] = None,
diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py
index 9c2afe5a3..c7df31214 100644
--- a/litellm/tests/test_custom_logger.py
+++ b/litellm/tests/test_custom_logger.py
@@ -437,8 +437,9 @@ async def test_cost_tracking_with_caching():
         max_tokens=40,
         temperature=0.2,
         caching=True,
+        mock_response="Hey, i'm doing well!",
     )
-    await asyncio.sleep(1)  # success callback is async
+    await asyncio.sleep(3)  # success callback is async
     response_cost = customHandler_optional_params.response_cost
     assert response_cost > 0
     response2 = await litellm.acompletion(
diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py
index 0e001eeba..0bce9894b 100644
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@@ -754,6 +754,9 @@ async def test_async_fallbacks_max_retries_per_request():
 
 def test_ausage_based_routing_fallbacks():
     try:
+        import litellm
+
+        litellm.set_verbose = False
         # [Prod Test]
         # IT tests Usage Based Routing with fallbacks
         # The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4"
@@ -766,10 +769,10 @@ def test_ausage_based_routing_fallbacks():
         load_dotenv()
 
         # Constants for TPM and RPM allocation
-        AZURE_FAST_RPM = 0
-        AZURE_BASIC_RPM = 0
+        AZURE_FAST_RPM = 1
+        AZURE_BASIC_RPM = 1
         OPENAI_RPM = 0
-        ANTHROPIC_RPM = 2
+        ANTHROPIC_RPM = 10
 
         def get_azure_params(deployment_name: str):
             params = {
@@ -832,9 +835,9 @@ def test_ausage_based_routing_fallbacks():
             fallbacks=fallbacks_list,
             set_verbose=True,
             debug_level="DEBUG",
-            routing_strategy="usage-based-routing",
+            routing_strategy="usage-based-routing-v2",
             redis_host=os.environ["REDIS_HOST"],
-            redis_port=os.environ["REDIS_PORT"],
+            redis_port=int(os.environ["REDIS_PORT"]),
             num_retries=0,
         )
 
@@ -853,8 +856,8 @@ def test_ausage_based_routing_fallbacks():
         # the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
         assert response._hidden_params["model_id"] == "1"
 
-        # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
-        for i in range(3):
+        for i in range(10):
+            # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
             response = router.completion(
                 model="azure/gpt-4-fast",
                 messages=messages,
@@ -863,8 +866,7 @@ def test_ausage_based_routing_fallbacks():
             )
             print("response: ", response)
             print("response._hidden_params: ", response._hidden_params)
-            if i == 2:
-                # by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
+            if i == 9:
                 assert response._hidden_params["model_id"] == "4"
 
     except Exception as e: