diff --git a/litellm/__init__.py b/litellm/__init__.py
index 35f931788..437728ce0 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -802,6 +802,7 @@ from .exceptions import (
     APIConnectionError,
     APIResponseValidationError,
     UnprocessableEntityError,
+    LITELLM_EXCEPTION_TYPES,
 )
 from .budget_manager import BudgetManager
 from .proxy.proxy_cli import run_server
diff --git a/litellm/exceptions.py b/litellm/exceptions.py
index a27562188..ea48d5838 100644
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@@ -582,6 +582,25 @@ class OpenAIError(openai.OpenAIError):  # type: ignore
         self.llm_provider = "openai"
 
 
+LITELLM_EXCEPTION_TYPES = [
+    AuthenticationError,
+    NotFoundError,
+    BadRequestError,
+    UnprocessableEntityError,
+    Timeout,
+    PermissionDeniedError,
+    RateLimitError,
+    ContextWindowExceededError,
+    RejectedRequestError,
+    ContentPolicyViolationError,
+    ServiceUnavailableError,
+    APIError,
+    APIConnectionError,
+    APIResponseValidationError,
+    OpenAIError,
+]
+
+
 class BudgetExceededError(Exception):
     def __init__(self, current_cost, max_budget):
         self.current_cost = current_cost
diff --git a/litellm/router.py b/litellm/router.py
index 88eb54a04..09b24e1d0 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2041,11 +2041,10 @@ class Router:
                     )
                     await asyncio.sleep(_timeout)
 
-            try:
-                cooldown_deployments = await self._async_get_cooldown_deployments()
-                original_exception.message += f"\nNumber Retries = {current_attempt + 1}, Max Retries={num_retries}\nCooldown Deployments={cooldown_deployments}"
-            except:
-                pass
+            if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES:
+                original_exception.max_retries = num_retries
+                original_exception.num_retries = current_attempt
+
             raise original_exception
 
     def should_retry_this_error(