diff --git a/litellm/router.py b/litellm/router.py index 27ac14b9f..e261c1743 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -318,6 +318,7 @@ class Router: self.default_deployment = None # use this to track the users default deployment, when they want to use model = * self.default_max_parallel_requests = default_max_parallel_requests self.provider_default_deployments: Dict[str, List] = {} + self.provider_default_deployment_ids: List[str] = [] if model_list is not None: model_list = copy.deepcopy(model_list) @@ -3178,6 +3179,9 @@ class Router: if self._is_cooldown_required(exception_status=exception_status) == False: return + if deployment in self.provider_default_deployment_ids: + return + _allowed_fails = self.get_allowed_fails_from_policy( exception=original_exception, ) @@ -3585,6 +3589,9 @@ class Router: deployment.to_json(exclude_none=True) ] + if deployment.model_info.id: + self.provider_default_deployment_ids.append(deployment.model_info.id) + # Azure GPT-Vision Enhancements, users can pass os.environ/ data_sources = deployment.litellm_params.get("dataSources", []) or [] diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py index f6b209042..a9a225be7 100644 --- a/litellm/tests/test_router_fallbacks.py +++ b/litellm/tests/test_router_fallbacks.py @@ -1190,12 +1190,6 @@ async def test_router_content_policy_fallbacks( @pytest.mark.parametrize("sync_mode", [False, True]) @pytest.mark.asyncio async def test_using_default_fallback(sync_mode): - """ - Tests Client Side Fallbacks - - User can pass "fallbacks": ["gpt-3.5-turbo"] and this should work - - """ litellm.set_verbose = True import logging @@ -1232,3 +1226,41 @@ async def test_using_default_fallback(sync_mode): except Exception as e: print("got exception = ", e) assert "No healthy deployment available, passed model=very-bad-model" in str(e) + + +@pytest.mark.parametrize("sync_mode", [False]) +@pytest.mark.asyncio +async def test_using_default_working_fallback(sync_mode): + litellm.set_verbose = True + + import logging + + from litellm._logging import verbose_logger, verbose_router_logger + + verbose_logger.setLevel(logging.DEBUG) + verbose_router_logger.setLevel(logging.DEBUG) + litellm.default_fallbacks = ["openai/gpt-3.5-turbo"] + router = Router( + model_list=[ + { + "model_name": "openai/*", + "litellm_params": { + "model": "openai/*", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + }, + ], + ) + + if sync_mode: + response = router.completion( + model="openai/foo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + else: + response = await router.acompletion( + model="openai/foo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + print("got response=", response) + assert response is not None