fix(test_router_fallbacks.py): fix tests

2024-04-30 18:48:39 -07:00 · 2024-04-30 18:48:39 -07:00 · bc5c9d7da9
commit bc5c9d7da9
parent 1baad80c7d
2 changed files with 24 additions and 45 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -1512,31 +1512,6 @@ class Router:
                    ## LOGGING
                    kwargs = self.log_retry(kwargs=kwargs, e=e)
                    remaining_retries = num_retries - current_attempt
-                    # if "No models available" in str(e):
-                    #     timeout = litellm._calculate_retry_after(
-                    #         remaining_retries=remaining_retries,
-                    #         max_retries=num_retries,
-                    #         min_timeout=self.retry_after,
-                    #     )
-                    #     await asyncio.sleep(timeout)
-                    # elif (
-                    #     hasattr(e, "status_code")
-                    #     and hasattr(e, "response")
-                    #     and litellm._should_retry(status_code=e.status_code)
-                    # ):
-                    #     if hasattr(e.response, "headers"):
-                    #         timeout = litellm._calculate_retry_after(
-                    #             remaining_retries=remaining_retries,
-                    #             max_retries=num_retries,
-                    #             response_headers=e.response.headers,
-                    #             min_timeout=self.retry_after,
-                    #         )
-                    #     else:
-                    #         timeout = litellm._calculate_retry_after(
-                    #             remaining_retries=remaining_retries,
-                    #             max_retries=num_retries,
-                    #             min_timeout=self.retry_after,
-                    #         )
                    _timeout = self._router_should_retry(
                        e=original_exception,
                        remaining_retries=remaining_retries,
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@ -127,7 +127,7 @@ def test_sync_fallbacks():
        response = router.completion(**kwargs)
        print(f"response: {response}")
        time.sleep(0.05)  # allow a delay as success_callbacks are on a separate thread
-        assert customHandler.previous_models == 1  # 0 retries, 1 fallback
+        assert customHandler.previous_models == 4

        print("Passed ! Test router_fallbacks: test_sync_fallbacks()")
        router.reset()
@ -209,12 +209,13 @@ async def test_async_fallbacks():
    user_message = "Hello, how are you?"
    messages = [{"content": user_message, "role": "user"}]
    try:
+        kwargs["model"] = "azure/gpt-3.5-turbo"
        response = await router.acompletion(**kwargs)
        print(f"customHandler.previous_models: {customHandler.previous_models}")
        await asyncio.sleep(
            0.05
        )  # allow a delay as success_callbacks are on a separate thread
-        assert customHandler.previous_models == 1  # 0 retries, 1 fallback
+        assert customHandler.previous_models == 4  # 1 init call, 2 retries, 1 fallback
        router.reset()
    except litellm.Timeout as e:
        pass
@ -258,7 +259,6 @@ def test_sync_fallbacks_embeddings():
        model_list=model_list,
        fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
        set_verbose=False,
-        num_retries=0,
    )
    customHandler = MyCustomHandler()
    litellm.callbacks = [customHandler]
@ -269,7 +269,7 @@ def test_sync_fallbacks_embeddings():
        response = router.embedding(**kwargs)
        print(f"customHandler.previous_models: {customHandler.previous_models}")
        time.sleep(0.05)  # allow a delay as success_callbacks are on a separate thread
-        assert customHandler.previous_models == 1  # 0 retries, 1 fallback
+        assert customHandler.previous_models == 4  # 1 init call, 2 retries, 1 fallback
        router.reset()
    except litellm.Timeout as e:
        pass
@ -323,7 +323,7 @@ async def test_async_fallbacks_embeddings():
        await asyncio.sleep(
            0.05
        )  # allow a delay as success_callbacks are on a separate thread
-        assert customHandler.previous_models == 1  # 0 retries, 1 fallback
+        assert customHandler.previous_models == 4  # 1 init call, 2 retries, 1 fallback
        router.reset()
    except litellm.Timeout as e:
        pass
@ -394,7 +394,7 @@ def test_dynamic_fallbacks_sync():
            },
        ]

-        router = Router(model_list=model_list, set_verbose=True, num_retries=0)
+        router = Router(model_list=model_list, set_verbose=True)
        kwargs = {}
        kwargs["model"] = "azure/gpt-3.5-turbo"
        kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
@ -402,7 +402,7 @@ def test_dynamic_fallbacks_sync():
        response = router.completion(**kwargs)
        print(f"response: {response}")
        time.sleep(0.05)  # allow a delay as success_callbacks are on a separate thread
-        assert customHandler.previous_models == 1  # 0 retries, 1 fallback
+        assert customHandler.previous_models == 4  # 1 init call, 2 retries, 1 fallback
        router.reset()
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
@ -488,7 +488,7 @@ async def test_dynamic_fallbacks_async():
        await asyncio.sleep(
            0.05
        )  # allow a delay as success_callbacks are on a separate thread
-        assert customHandler.previous_models == 1  # 0 retries, 1 fallback
+        assert customHandler.previous_models == 4  # 1 init call, 2 retries, 1 fallback
        router.reset()
    except Exception as e:
        pytest.fail(f"An exception occurred - {e}")
@ -573,7 +573,7 @@ async def test_async_fallbacks_streaming():
        await asyncio.sleep(
            0.05
        )  # allow a delay as success_callbacks are on a separate thread
-        assert customHandler.previous_models == 1  # 0 retries, 1 fallback
+        assert customHandler.previous_models == 4  # 1 init call, 2 retries, 1 fallback
        router.reset()
    except litellm.Timeout as e:
        pass
@ -766,10 +766,10 @@ def test_usage_based_routing_fallbacks():
        load_dotenv()

        # Constants for TPM and RPM allocation
-        AZURE_FAST_TPM = 3
-        AZURE_BASIC_TPM = 4
-        OPENAI_TPM = 400
-        ANTHROPIC_TPM = 100000
+        AZURE_FAST_RPM = 3
+        AZURE_BASIC_RPM = 4
+        OPENAI_RPM = 10
+        ANTHROPIC_RPM = 100000

        def get_azure_params(deployment_name: str):
            params = {
@ -798,22 +798,26 @@ def test_usage_based_routing_fallbacks():
            {
                "model_name": "azure/gpt-4-fast",
                "litellm_params": get_azure_params("chatgpt-v-2"),
-                "tpm": AZURE_FAST_TPM,
+                "model_info": {"id": 1},
+                "rpm": AZURE_FAST_RPM,
            },
            {
                "model_name": "azure/gpt-4-basic",
                "litellm_params": get_azure_params("chatgpt-v-2"),
-                "tpm": AZURE_BASIC_TPM,
+                "model_info": {"id": 2},
+                "rpm": AZURE_BASIC_RPM,
            },
            {
                "model_name": "openai-gpt-4",
                "litellm_params": get_openai_params("gpt-3.5-turbo"),
-                "tpm": OPENAI_TPM,
+                "model_info": {"id": 3},
+                "rpm": OPENAI_RPM,
            },
            {
                "model_name": "anthropic-claude-instant-1.2",
                "litellm_params": get_anthropic_params("claude-instant-1.2"),
-                "tpm": ANTHROPIC_TPM,
+                "model_info": {"id": 4},
+                "rpm": ANTHROPIC_RPM,
            },
        ]
        # litellm.set_verbose=True
@ -844,10 +848,10 @@ def test_usage_based_routing_fallbacks():
            mock_response="very nice to meet you",
        )
        print("response: ", response)
-        print("response._hidden_params: ", response._hidden_params)
+        print(f"response._hidden_params: {response._hidden_params}")
        # in this test, we expect azure/gpt-4 fast to fail, then azure-gpt-4 basic to fail and then openai-gpt-4 to pass
        # the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
-        assert response._hidden_params["custom_llm_provider"] == "openai"
+        assert response._hidden_params["model_id"] == "1"

        # now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
        for i in range(20):
@ -861,7 +865,7 @@ def test_usage_based_routing_fallbacks():
            print("response._hidden_params: ", response._hidden_params)
            if i == 19:
                # by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
-                assert response._hidden_params["custom_llm_provider"] == "anthropic"
+                assert response._hidden_params["model_id"] == "4"

    except Exception as e:
        pytest.fail(f"An exception occurred {e}")