(test) usage based routing with fallbacks

2025-04-27 19:54:13 +00:00 · 2024-01-22 14:21:30 -08:00 · 2024-01-22 14:21:30 -08:00 · c20d9299cc
commit c20d9299cc
parent 4e89be0e19
1 changed files with 20 additions and 4 deletions
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@ -716,7 +716,7 @@ def test_usage_based_routing_fallbacks():
        # Constants for TPM and RPM allocation
        AZURE_FAST_TPM = 3
        AZURE_BASIC_TPM = 4
-        OPENAI_TPM = 2000
+        OPENAI_TPM = 400
        ANTHROPIC_TPM = 100000
        def get_azure_params(deployment_name: str):
@ -775,6 +775,7 @@ def test_usage_based_routing_fallbacks():
            model_list=model_list,
            fallbacks=fallbacks_list,
            set_verbose=True,
            debug_level="DEBUG",
            routing_strategy="usage-based-routing",
            redis_host=os.environ["REDIS_HOST"],
            redis_port=os.environ["REDIS_PORT"],
@ -783,17 +784,32 @@ def test_usage_based_routing_fallbacks():
        messages = [
            {"content": "Tell me a joke.", "role": "user"},
        ]
        response = router.completion(
-            model="azure/gpt-4-fast", messages=messages, timeout=5
+            model="azure/gpt-4-fast",
            messages=messages,
            timeout=5,
            mock_response="very nice to meet you",
        )
        print("response: ", response)
        print("response._hidden_params: ", response._hidden_params)
        # in this test, we expect azure/gpt-4 fast to fail, then azure-gpt-4 basic to fail and then openai-gpt-4 to pass
        # the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
        assert response._hidden_params["custom_llm_provider"] == "openai"
        # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
        for i in range(20):
            response = router.completion(
                model="azure/gpt-4-fast",
                messages=messages,
                timeout=5,
                mock_response="very nice to meet you",
            )
            print("response: ", response)
            print("response._hidden_params: ", response._hidden_params)
            if i == 19:
                # by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
                assert response._hidden_params["custom_llm_provider"] == "anthropic"
    except Exception as e:
        pytest.fail(f"An exception occurred {e}")