From 0b06a76cf9d264cb9d30be18c261ed900d34c487 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 24 Aug 2024 09:53:05 -0700
Subject: [PATCH] fix(router.py): don't cooldown on apiconnectionerrors

Fixes issue where model would be in cooldown due to api connection errors
---
 litellm/llms/vertex_ai.py    |  4 ++-
 litellm/router.py            | 31 ++++++++++++++++++++---
 litellm/tests/test_router.py | 48 ++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py
index c891a86ee..9fc063d1a 100644
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@@ -897,7 +897,9 @@ def completion(
     except Exception as e:
         if isinstance(e, VertexAIError):
             raise e
-        raise VertexAIError(status_code=500, message=str(e))
+        raise litellm.APIConnectionError(
+            message=str(e), llm_provider="vertex_ai", model=model
+        )
 
 
 async def async_completion(
diff --git a/litellm/router.py b/litellm/router.py
index 7a938f5c4..6ca5e4d56 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -3081,7 +3081,9 @@ class Router:
                 key=rpm_key, value=request_count, local_only=True
             )  # don't change existing ttl
 
-    def _is_cooldown_required(self, exception_status: Union[str, int]):
+    def _is_cooldown_required(
+        self, exception_status: Union[str, int], exception_str: Optional[str] = None
+    ):
         """
         A function to determine if a cooldown is required based on the exception status.
 
@@ -3092,6 +3094,13 @@ class Router:
             bool: True if a cooldown is required, False otherwise.
         """
         try:
+            ignored_strings = ["APIConnectionError"]
+            if (
+                exception_str is not None
+            ):  # don't cooldown on litellm api connection errors errors
+                for ignored_string in ignored_strings:
+                    if ignored_string in exception_str:
+                        return False
 
             if isinstance(exception_status, str):
                 exception_status = int(exception_status)
@@ -3177,7 +3186,12 @@ class Router:
         if deployment is None:
             return
 
-        if self._is_cooldown_required(exception_status=exception_status) == False:
+        if (
+            self._is_cooldown_required(
+                exception_status=exception_status, exception_str=str(original_exception)
+            )
+            is False
+        ):
             return
 
         if deployment in self.provider_default_deployment_ids:
@@ -4418,7 +4432,7 @@ class Router:
         - List, if multiple models chosen
         """
         # check if aliases set on litellm model alias map
-        if specific_deployment == True:
+        if specific_deployment is True:
             # users can also specify a specific deployment name. At this point we should check if they are just trying to call a specific deployment
             for deployment in self.model_list:
                 deployment_model = deployment.get("litellm_params").get("model")
@@ -4492,6 +4506,7 @@ class Router:
             raise ValueError(
                 f"No healthy deployment available, passed model={model}. Try again in {self.cooldown_time} seconds"
             )
+
         if litellm.model_alias_map and model in litellm.model_alias_map:
             model = litellm.model_alias_map[
                 model
@@ -4722,7 +4737,15 @@ class Router:
         # filter pre-call checks
         if self.enable_pre_call_checks and messages is not None:
             healthy_deployments = self._pre_call_checks(
-                model=model, healthy_deployments=healthy_deployments, messages=messages
+                model=model,
+                healthy_deployments=healthy_deployments,
+                messages=messages,
+                request_kwargs=request_kwargs,
+            )
+
+        if len(healthy_deployments) == 0:
+            raise ValueError(
+                f"{RouterErrors.no_deployments_available.value}, Try again in {self.cooldown_time} seconds. Passed model={model}. pre-call-checks={self.enable_pre_call_checks}, cooldown_list={self._get_cooldown_deployments()}"
             )
 
         if self.routing_strategy == "least-busy" and self.leastbusy_logger is not None:
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 12d485dde..0eb014f85 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -2107,3 +2107,51 @@ def test_router_context_window_pre_call_check(model, base_model, llm_provider):
                 pass
     except Exception as e:
         pytest.fail(f"Got unexpected exception on router! - {str(e)}")
+
+
+def test_router_cooldown_api_connection_error():
+    # try:
+    #     _ = litellm.completion(
+    #         model="vertex_ai/gemini-1.5-pro",
+    #         messages=[{"role": "admin", "content": "Fail on this!"}],
+    #     )
+    # except litellm.APIConnectionError as e:
+    #     assert (
+    #         Router()._is_cooldown_required(
+    #             exception_status=e.code, exception_str=str(e)
+    #         )
+    #         is False
+    #     )
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gemini-1.5-pro",
+                "litellm_params": {"model": "vertex_ai/gemini-1.5-pro"},
+            }
+        ]
+    )
+
+    try:
+        router.completion(
+            model="gemini-1.5-pro",
+            messages=[{"role": "admin", "content": "Fail on this!"}],
+        )
+    except litellm.APIConnectionError:
+        pass
+
+    try:
+        router.completion(
+            model="gemini-1.5-pro",
+            messages=[{"role": "admin", "content": "Fail on this!"}],
+        )
+    except litellm.APIConnectionError:
+        pass
+
+    try:
+        router.completion(
+            model="gemini-1.5-pro",
+            messages=[{"role": "admin", "content": "Fail on this!"}],
+        )
+    except litellm.APIConnectionError:
+        pass