diff --git a/litellm/router.py b/litellm/router.py index 31f8a0be81..ad95a0a9ea 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2154,7 +2154,6 @@ class Router: - there are no fallbacks - there are no healthy deployments in the same model group """ - _num_healthy_deployments = 0 if healthy_deployments is not None and isinstance(healthy_deployments, list): _num_healthy_deployments = len(healthy_deployments) @@ -2167,15 +2166,21 @@ class Router: raise error # Error we should only retry if there are other deployments - if isinstance(error, openai.RateLimitError) or isinstance( - error, openai.AuthenticationError - ): + if isinstance(error, openai.RateLimitError): if ( - _num_healthy_deployments <= 0 - and regular_fallbacks is not None + _num_healthy_deployments <= 0 # if no healthy deployments + and regular_fallbacks is not None # and fallbacks available and len(regular_fallbacks) > 0 ): - raise error + raise error # then raise the error + + if isinstance(error, openai.AuthenticationError): + """ + - if other deployments available -> retry + - else -> raise error + """ + if _num_healthy_deployments <= 0: # if no healthy deployments + raise error # then raise error return True diff --git a/litellm/tests/test_router_retries.py b/litellm/tests/test_router_retries.py index 58e52fe996..ee84ce3ad7 100644 --- a/litellm/tests/test_router_retries.py +++ b/litellm/tests/test_router_retries.py @@ -60,7 +60,7 @@ Test sync + async @pytest.mark.parametrize("sync_mode", [True, False]) -@pytest.mark.parametrize("error_type", ["Authorization Error", "API Error"]) +@pytest.mark.parametrize("error_type", ["API Error", "Authorization Error"]) @pytest.mark.asyncio async def test_router_retries_errors(sync_mode, error_type): """