diff --git a/litellm/__init__.py b/litellm/__init__.py index 35f931788..437728ce0 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -802,6 +802,7 @@ from .exceptions import ( APIConnectionError, APIResponseValidationError, UnprocessableEntityError, + LITELLM_EXCEPTION_TYPES, ) from .budget_manager import BudgetManager from .proxy.proxy_cli import run_server diff --git a/litellm/exceptions.py b/litellm/exceptions.py index a27562188..ea48d5838 100644 --- a/litellm/exceptions.py +++ b/litellm/exceptions.py @@ -582,6 +582,25 @@ class OpenAIError(openai.OpenAIError): # type: ignore self.llm_provider = "openai" +LITELLM_EXCEPTION_TYPES = [ + AuthenticationError, + NotFoundError, + BadRequestError, + UnprocessableEntityError, + Timeout, + PermissionDeniedError, + RateLimitError, + ContextWindowExceededError, + RejectedRequestError, + ContentPolicyViolationError, + ServiceUnavailableError, + APIError, + APIConnectionError, + APIResponseValidationError, + OpenAIError, +] + + class BudgetExceededError(Exception): def __init__(self, current_cost, max_budget): self.current_cost = current_cost diff --git a/litellm/router.py b/litellm/router.py index 88eb54a04..09b24e1d0 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2041,11 +2041,10 @@ class Router: ) await asyncio.sleep(_timeout) - try: - cooldown_deployments = await self._async_get_cooldown_deployments() - original_exception.message += f"\nNumber Retries = {current_attempt + 1}, Max Retries={num_retries}\nCooldown Deployments={cooldown_deployments}" - except: - pass + if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES: + original_exception.max_retries = num_retries + original_exception.num_retries = current_attempt + raise original_exception def should_retry_this_error(