[Fix-Router] Don't cooldown when only 1 deployment exists (#5673)

* fix get model list * fix test custom callback router * fix embedding fallback test * fix router retry policy on AuthErrors * fix router test * add test for single deployments no cooldown test prod * add test test_single_deployment_no_cooldowns_test_prod_mock_completion_calls
2024-09-12 19:14:58 -07:00 · 2024-09-12 19:14:58 -07:00 · 19a06d7842
commit 19a06d7842
parent 13ba22d6fd
4 changed files with 128 additions and 17 deletions
--- a/litellm/tests/test_custom_callback_router.py
+++ b/litellm/tests/test_custom_callback_router.py
@ -533,6 +533,7 @@ async def test_async_chat_azure_with_fallbacks():
    try:
        customHandler_fallbacks = CompletionCustomHandler()
        litellm.callbacks = [customHandler_fallbacks]
+        litellm.set_verbose = True
        # with fallbacks
        model_list = [
            {
@ -555,7 +556,13 @@ async def test_async_chat_azure_with_fallbacks():
                "rpm": 1800,
            },
        ]
-        router = Router(model_list=model_list, fallbacks=[{"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}])  # type: ignore
+        router = Router(
+            model_list=model_list,
+            fallbacks=[{"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}],
+            retry_policy=litellm.router.RetryPolicy(
+                AuthenticationErrorRetries=0,
+            ),
+        )  # type: ignore
        response = await router.acompletion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],