feat - add num retries and max retries in exception

2025-04-25 18:54:30 +00:00 · 2024-06-01 16:53:00 -07:00 · 2024-06-01 16:53:00 -07:00 · 286d42a881
commit 286d42a881
parent 40622f5f27
3 changed files with 24 additions and 5 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -802,6 +802,7 @@ from .exceptions import (
    APIConnectionError,
    APIResponseValidationError,
    UnprocessableEntityError,
    LITELLM_EXCEPTION_TYPES,
 )
 from .budget_manager import BudgetManager
 from .proxy.proxy_cli import run_server
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -582,6 +582,25 @@ class OpenAIError(openai.OpenAIError):  # type: ignore
        self.llm_provider = "openai"
 LITELLM_EXCEPTION_TYPES = [
    AuthenticationError,
    NotFoundError,
    BadRequestError,
    UnprocessableEntityError,
    Timeout,
    PermissionDeniedError,
    RateLimitError,
    ContextWindowExceededError,
    RejectedRequestError,
    ContentPolicyViolationError,
    ServiceUnavailableError,
    APIError,
    APIConnectionError,
    APIResponseValidationError,
    OpenAIError,
 ]
 class BudgetExceededError(Exception):
    def __init__(self, current_cost, max_budget):
        self.current_cost = current_cost
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2041,11 +2041,10 @@ class Router:
                    )
                    await asyncio.sleep(_timeout)
-            try:
+            if type(original_exception) in litellm.LITELLM_EXCEPTION_TYPES:
-                cooldown_deployments = await self._async_get_cooldown_deployments()
+                original_exception.max_retries = num_retries
-                original_exception.message += f"\nNumber Retries = {current_attempt + 1}, Max Retries={num_retries}\nCooldown Deployments={cooldown_deployments}"
+                original_exception.num_retries = current_attempt
-            except:
+
                pass
            raise original_exception
    def should_retry_this_error(