diff --git a/litellm/constants.py b/litellm/constants.py
index fc5f3d0448..e72d5facec 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -2,11 +2,16 @@ ROUTER_MAX_FALLBACKS = 5
 DEFAULT_BATCH_SIZE = 512
 DEFAULT_FLUSH_INTERVAL_SECONDS = 5
 DEFAULT_MAX_RETRIES = 2
+DEFAULT_FAILURE_THRESHOLD_PERCENT = (
+    0.5  # default cooldown a deployment if 50% of requests fail in a given minute
+)
+DEFAULT_COOLDOWN_TIME_SECONDS = 5
 DEFAULT_REPLICATE_POLLING_RETRIES = 5
 DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
 DEFAULT_IMAGE_TOKEN_COUNT = 250
 DEFAULT_IMAGE_WIDTH = 300
 DEFAULT_IMAGE_HEIGHT = 300
+SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000  # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
 LITELLM_CHAT_PROVIDERS = [
     "openai",
     "openai_like",
diff --git a/litellm/router.py b/litellm/router.py
index cb22ac6d67..1747672bbb 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -3617,66 +3617,6 @@ class Router:
 
         return request_count
 
-    def _is_cooldown_required(
-        self,
-        model_id: str,
-        exception_status: Union[str, int],
-        exception_str: Optional[str] = None,
-    ) -> bool:
-        """
-        A function to determine if a cooldown is required based on the exception status.
-
-        Parameters:
-            model_id (str) The id of the model in the model list
-            exception_status (Union[str, int]): The status of the exception.
-
-        Returns:
-            bool: True if a cooldown is required, False otherwise.
-        """
-        ## BASE CASE - single deployment
-        model_group = self.get_model_group(id=model_id)
-        if model_group is not None and len(model_group) == 1:
-            return False
-
-        try:
-            ignored_strings = ["APIConnectionError"]
-            if (
-                exception_str is not None
-            ):  # don't cooldown on litellm api connection errors errors
-                for ignored_string in ignored_strings:
-                    if ignored_string in exception_str:
-                        return False
-
-            if isinstance(exception_status, str):
-                exception_status = int(exception_status)
-
-            if exception_status >= 400 and exception_status < 500:
-                if exception_status == 429:
-                    # Cool down 429 Rate Limit Errors
-                    return True
-
-                elif exception_status == 401:
-                    # Cool down 401 Auth Errors
-                    return True
-
-                elif exception_status == 408:
-                    return True
-
-                elif exception_status == 404:
-                    return True
-
-                else:
-                    # Do NOT cool down all other 4XX Errors
-                    return False
-
-            else:
-                # should cool down for all other errors
-                return True
-
-        except Exception:
-            # Catch all - if any exceptions default to cooling down
-            return True
-
     def _has_default_fallbacks(self) -> bool:
         if self.fallbacks is None:
             return False
diff --git a/litellm/router_utils/cooldown_handlers.py b/litellm/router_utils/cooldown_handlers.py
index 1e1c58a771..8f5c3895a6 100644
--- a/litellm/router_utils/cooldown_handlers.py
+++ b/litellm/router_utils/cooldown_handlers.py
@@ -11,6 +11,11 @@ from typing import TYPE_CHECKING, Any, List, Optional, Union
 
 import litellm
 from litellm._logging import verbose_router_logger
+from litellm.constants import (
+    DEFAULT_COOLDOWN_TIME_SECONDS,
+    DEFAULT_FAILURE_THRESHOLD_PERCENT,
+    SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD,
+)
 from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
 
 from .router_callbacks.track_deployment_metrics import (
@@ -28,10 +33,62 @@ if TYPE_CHECKING:
 else:
     LitellmRouter = Any
     Span = Any
-DEFAULT_FAILURE_THRESHOLD_PERCENT = (
-    0.5  # default cooldown a deployment if 50% of requests fail in a given minute
-)
-DEFAULT_COOLDOWN_TIME_SECONDS = 5
+
+
+def _is_cooldown_required(
+    litellm_router_instance: LitellmRouter,
+    model_id: str,
+    exception_status: Union[str, int],
+    exception_str: Optional[str] = None,
+) -> bool:
+    """
+    A function to determine if a cooldown is required based on the exception status.
+
+    Parameters:
+        model_id (str) The id of the model in the model list
+        exception_status (Union[str, int]): The status of the exception.
+
+    Returns:
+        bool: True if a cooldown is required, False otherwise.
+    """
+    try:
+        ignored_strings = ["APIConnectionError"]
+        if (
+            exception_str is not None
+        ):  # don't cooldown on litellm api connection errors errors
+            for ignored_string in ignored_strings:
+                if ignored_string in exception_str:
+                    return False
+
+        if isinstance(exception_status, str):
+            exception_status = int(exception_status)
+
+        if exception_status >= 400 and exception_status < 500:
+            if exception_status == 429:
+                # Cool down 429 Rate Limit Errors
+                return True
+
+            elif exception_status == 401:
+                # Cool down 401 Auth Errors
+                return True
+
+            elif exception_status == 408:
+                return True
+
+            elif exception_status == 404:
+                return True
+
+            else:
+                # Do NOT cool down all other 4XX Errors
+                return False
+
+        else:
+            # should cool down for all other errors
+            return True
+
+    except Exception:
+        # Catch all - if any exceptions default to cooling down
+        return True
 
 
 def _should_run_cooldown_logic(
@@ -51,13 +108,20 @@ def _should_run_cooldown_logic(
     - deployment is in litellm_router_instance.provider_default_deployment_ids
     - exception_status is not one that should be immediately retried (e.g. 401)
     """
+    if (
+        deployment is None
+        or litellm_router_instance.get_model_group(id=deployment) is None
+    ):
+        return False
+
     if litellm_router_instance.disable_cooldowns:
         return False
 
     if deployment is None:
         return False
 
-    if not litellm_router_instance._is_cooldown_required(
+    if not _is_cooldown_required(
+        litellm_router_instance=litellm_router_instance,
         model_id=deployment,
         exception_status=exception_status,
         exception_str=str(original_exception),
@@ -94,6 +158,11 @@ def _should_cooldown_deployment(
 
     - v1 logic (Legacy): if allowed fails or allowed fail policy set, coolsdown if num fails in this minute > allowed fails
     """
+    ## BASE CASE - single deployment
+    model_group = litellm_router_instance.get_model_group(id=deployment)
+    is_single_deployment_model_group = False
+    if model_group is not None and len(model_group) == 1:
+        is_single_deployment_model_group = True
     if (
         litellm_router_instance.allowed_fails_policy is None
         and _is_allowed_fails_set_on_router(
@@ -121,14 +190,21 @@ def _should_cooldown_deployment(
             num_successes_this_minute,
             num_fails_this_minute,
         )
+
         exception_status_int = cast_exception_status_to_int(exception_status)
-        if exception_status_int == 429:
+        if exception_status_int == 429 and not is_single_deployment_model_group:
             return True
         elif (
-            total_requests_this_minute == 1
-        ):  # if the 1st request fails it's not guaranteed that the deployment should be cooled down
-            return False
-        elif percent_fails > DEFAULT_FAILURE_THRESHOLD_PERCENT:
+            percent_fails == 1.0
+            and total_requests_this_minute
+            >= SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD
+        ):
+            # Cooldown if all requests failed and we have reasonable traffic
+            return True
+        elif (
+            percent_fails > DEFAULT_FAILURE_THRESHOLD_PERCENT
+            and not is_single_deployment_model_group  # by default we should avoid cooldowns on single deployment model groups
+        ):
             return True
 
         elif (
@@ -140,7 +216,7 @@ def _should_cooldown_deployment(
             return True
 
         return False
-    else:
+    elif not is_single_deployment_model_group:
         return should_cooldown_based_on_allowed_fails_policy(
             litellm_router_instance=litellm_router_instance,
             deployment=deployment,
diff --git a/tests/local_testing/test_router.py b/tests/local_testing/test_router.py
index d2c1c8fbec..965b47f572 100644
--- a/tests/local_testing/test_router.py
+++ b/tests/local_testing/test_router.py
@@ -2190,6 +2190,8 @@ def test_router_context_window_pre_call_check(model, base_model, llm_provider):
 
 
 def test_router_cooldown_api_connection_error():
+    from litellm.router_utils.cooldown_handlers import _is_cooldown_required
+
     try:
         _ = litellm.completion(
             model="vertex_ai/gemini-1.5-pro",
@@ -2197,8 +2199,11 @@ def test_router_cooldown_api_connection_error():
         )
     except litellm.APIConnectionError as e:
         assert (
-            Router()._is_cooldown_required(
-                model_id="", exception_status=e.code, exception_str=str(e)
+            _is_cooldown_required(
+                litellm_router_instance=Router(),
+                model_id="",
+                exception_status=e.code,
+                exception_str=str(e),
             )
             is False
         )
diff --git a/tests/router_unit_tests/test_router_cooldown_utils.py b/tests/router_unit_tests/test_router_cooldown_utils.py
index 1623808fc1..4ac703419a 100644
--- a/tests/router_unit_tests/test_router_cooldown_utils.py
+++ b/tests/router_unit_tests/test_router_cooldown_utils.py
@@ -25,6 +25,11 @@ from litellm.router_utils.router_callbacks.track_deployment_metrics import (
     increment_deployment_successes_for_current_minute,
 )
 
+import pytest
+from unittest.mock import patch
+from litellm import Router
+from litellm.router_utils.cooldown_handlers import _should_cooldown_deployment
+
 load_dotenv()
 
 
@@ -183,6 +188,11 @@ def testing_litellm_router():
                 "litellm_params": {"model": "openai/test_deployment"},
                 "model_id": "test_deployment_2",
             },
+            {
+                "model_name": "test_deployment",
+                "litellm_params": {"model": "openai/test_deployment-2"},
+                "model_id": "test_deployment_3",
+            },
         ]
     )
 
@@ -395,3 +405,114 @@ def test_cast_exception_status_to_int():
     assert cast_exception_status_to_int(200) == 200
     assert cast_exception_status_to_int("404") == 404
     assert cast_exception_status_to_int("invalid") == 500
+
+
+@pytest.fixture
+def router():
+    return Router(
+        model_list=[
+            {
+                "model_name": "gpt-4",
+                "litellm_params": {"model": "gpt-4"},
+                "model_info": {
+                    "id": "gpt-4--0",
+                },
+            }
+        ]
+    )
+
+
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_successes_for_current_minute"
+)
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_failures_for_current_minute"
+)
+def test_should_cooldown_high_traffic_all_fails(mock_failures, mock_successes, router):
+    # Simulate 10 failures, 0 successes
+    from litellm.constants import SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD
+
+    mock_failures.return_value = SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD + 1
+    mock_successes.return_value = 0
+
+    should_cooldown = _should_cooldown_deployment(
+        litellm_router_instance=router,
+        deployment="gpt-4--0",
+        exception_status=500,
+        original_exception=Exception("Test error"),
+    )
+
+    assert (
+        should_cooldown is True
+    ), "Should cooldown when all requests fail with sufficient traffic"
+
+
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_successes_for_current_minute"
+)
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_failures_for_current_minute"
+)
+def test_no_cooldown_low_traffic(mock_failures, mock_successes, router):
+    # Simulate 3 failures (below MIN_TRAFFIC_THRESHOLD)
+    mock_failures.return_value = 3
+    mock_successes.return_value = 0
+
+    should_cooldown = _should_cooldown_deployment(
+        litellm_router_instance=router,
+        deployment="gpt-4--0",
+        exception_status=500,
+        original_exception=Exception("Test error"),
+    )
+
+    assert (
+        should_cooldown is False
+    ), "Should not cooldown when traffic is below threshold"
+
+
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_successes_for_current_minute"
+)
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_failures_for_current_minute"
+)
+def test_cooldown_rate_limit(mock_failures, mock_successes, router):
+    """
+    Don't cooldown single deployment models, for anything besides traffic
+    """
+    mock_failures.return_value = 1
+    mock_successes.return_value = 0
+
+    should_cooldown = _should_cooldown_deployment(
+        litellm_router_instance=router,
+        deployment="gpt-4--0",
+        exception_status=429,  # Rate limit error
+        original_exception=Exception("Rate limit exceeded"),
+    )
+
+    assert (
+        should_cooldown is False
+    ), "Should not cooldown on rate limit error for single deployment models"
+
+
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_successes_for_current_minute"
+)
+@patch(
+    "litellm.router_utils.cooldown_handlers.get_deployment_failures_for_current_minute"
+)
+def test_mixed_success_failure(mock_failures, mock_successes, router):
+    # Simulate 3 failures, 7 successes
+    mock_failures.return_value = 3
+    mock_successes.return_value = 7
+
+    should_cooldown = _should_cooldown_deployment(
+        litellm_router_instance=router,
+        deployment="gpt-4--0",
+        exception_status=500,
+        original_exception=Exception("Test error"),
+    )
+
+    assert (
+        should_cooldown is False
+    ), "Should not cooldown when failure rate is below threshold"