Merge pull request #3655 from BerriAI/litellm_only_show_debug_info_server_side

Fix Proxy Server - only show API base, Model server log exceptions, not on client side
2024-05-15 14:20:14 -07:00 · 2024-05-15 14:20:14 -07:00 · c00d232900
commit c00d232900
parent d9ad7c6218 1a11b151b7
4 changed files with 199 additions and 54 deletions
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -15,11 +15,19 @@ from typing import Optional


 class AuthenticationError(openai.AuthenticationError):  # type: ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 401
        self.message = message
        self.llm_provider = llm_provider
        self.model = model
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            self.message, response=response, body=None
        )  # Call the base class constructor with the parameters it needs
@ -27,11 +35,19 @@ class AuthenticationError(openai.AuthenticationError):  # type: ignore

 # raise when invalid models passed, example gpt-8
 class NotFoundError(openai.NotFoundError):  # type: ignore
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 404
        self.message = message
        self.model = model
        self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            self.message, response=response, body=None
        )  # Call the base class constructor with the parameters it needs
@ -39,12 +55,18 @@ class NotFoundError(openai.NotFoundError):  # type: ignore

 class BadRequestError(openai.BadRequestError):  # type: ignore
    def __init__(
-        self, message, model, llm_provider, response: Optional[httpx.Response] = None
+        self,
+        message,
+        model,
+        llm_provider,
+        response: Optional[httpx.Response] = None,
+        litellm_debug_info: Optional[str] = None,
    ):
        self.status_code = 400
        self.message = message
        self.model = model
        self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
        response = response or httpx.Response(
            status_code=self.status_code,
            request=httpx.Request(
@ -57,18 +79,28 @@ class BadRequestError(openai.BadRequestError):  # type: ignore


 class UnprocessableEntityError(openai.UnprocessableEntityError):  # type: ignore
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 422
        self.message = message
        self.model = model
        self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            self.message, response=response, body=None
        )  # Call the base class constructor with the parameters it needs


 class Timeout(openai.APITimeoutError):  # type: ignore
-    def __init__(self, message, model, llm_provider):
+    def __init__(
+        self, message, model, llm_provider, litellm_debug_info: Optional[str] = None
+    ):
        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
        super().__init__(
            request=request
@ -77,6 +109,7 @@ class Timeout(openai.APITimeoutError):  # type: ignore
        self.message = message
        self.model = model
        self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info

    # custom function to convert to str
    def __str__(self):
@ -84,22 +117,38 @@ class Timeout(openai.APITimeoutError):  # type: ignore


 class PermissionDeniedError(openai.PermissionDeniedError):  # type:ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 403
        self.message = message
        self.llm_provider = llm_provider
        self.model = model
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            self.message, response=response, body=None
        )  # Call the base class constructor with the parameters it needs


 class RateLimitError(openai.RateLimitError):  # type: ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 429
        self.message = message
        self.llm_provider = llm_provider
        self.modle = model
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            self.message, response=response, body=None
        )  # Call the base class constructor with the parameters it needs
@ -107,11 +156,19 @@ class RateLimitError(openai.RateLimitError):  # type: ignore

 # sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
 class ContextWindowExceededError(BadRequestError):  # type: ignore
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 400
        self.message = message
        self.model = model
        self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            message=self.message,
            model=self.model,  # type: ignore
@ -122,11 +179,19 @@ class ContextWindowExceededError(BadRequestError):  # type: ignore

 class ContentPolicyViolationError(BadRequestError):  # type: ignore
    #  Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
-    def __init__(self, message, model, llm_provider, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        model,
+        llm_provider,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 400
        self.message = message
        self.model = model
        self.llm_provider = llm_provider
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            message=self.message,
            model=self.model,  # type: ignore
@ -136,11 +201,19 @@ class ContentPolicyViolationError(BadRequestError):  # type: ignore


 class ServiceUnavailableError(openai.APIStatusError):  # type: ignore
-    def __init__(self, message, llm_provider, model, response: httpx.Response):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        response: httpx.Response,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.status_code = 503
        self.message = message
        self.llm_provider = llm_provider
        self.model = model
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(
            self.message, response=response, body=None
        )  # Call the base class constructor with the parameters it needs
@ -149,33 +222,51 @@ class ServiceUnavailableError(openai.APIStatusError):  # type: ignore
 # raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
 class APIError(openai.APIError):  # type: ignore
    def __init__(
-        self, status_code, message, llm_provider, model, request: httpx.Request
+        self,
+        status_code,
+        message,
+        llm_provider,
+        model,
+        request: httpx.Request,
+        litellm_debug_info: Optional[str] = None,
    ):
        self.status_code = status_code
        self.message = message
        self.llm_provider = llm_provider
        self.model = model
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(self.message, request=request, body=None)  # type: ignore


 # raised if an invalid request (not get, delete, put, post) is made
 class APIConnectionError(openai.APIConnectionError):  # type: ignore
-    def __init__(self, message, llm_provider, model, request: httpx.Request):
+    def __init__(
+        self,
+        message,
+        llm_provider,
+        model,
+        request: httpx.Request,
+        litellm_debug_info: Optional[str] = None,
+    ):
        self.message = message
        self.llm_provider = llm_provider
        self.model = model
        self.status_code = 500
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(message=self.message, request=request)


 # raised if an invalid request (not get, delete, put, post) is made
 class APIResponseValidationError(openai.APIResponseValidationError):  # type: ignore
-    def __init__(self, message, llm_provider, model):
+    def __init__(
+        self, message, llm_provider, model, litellm_debug_info: Optional[str] = None
+    ):
        self.message = message
        self.llm_provider = llm_provider
        self.model = model
        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
        response = httpx.Response(status_code=500, request=request)
+        self.litellm_debug_info = litellm_debug_info
        super().__init__(response=response, body=None, message=message)


--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -3857,8 +3857,11 @@ async def chat_completion(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
+        litellm_debug_info = getattr(e, "litellm_debug_info", "")
        verbose_proxy_logger.debug(
-            f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
+            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            e,
+            litellm_debug_info,
        )
        router_model_names = llm_router.model_names if llm_router is not None else []
        if user_debug:
@ -4049,9 +4052,11 @@ async def completion(
    except Exception as e:
        data["litellm_status"] = "fail"  # used for alerting
        verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
+        litellm_debug_info = getattr(e, "litellm_debug_info", "")
        verbose_proxy_logger.debug(
-            "\033[1;31mAn error occurred: %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
            e,
+            litellm_debug_info,
        )
        traceback.print_exc()
        error_traceback = traceback.format_exc()
@ -4254,6 +4259,12 @@ async def embeddings(
        await proxy_logging_obj.post_call_failure_hook(
            user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
        )
+        litellm_debug_info = getattr(e, "litellm_debug_info", "")
+        verbose_proxy_logger.debug(
+            "\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
+            e,
+            litellm_debug_info,
+        )
        traceback.print_exc()
        if isinstance(e, HTTPException):
            raise ProxyException(
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -418,9 +418,14 @@ class ProxyLogging:

            Related issue - https://github.com/BerriAI/litellm/issues/3395
            """
+            litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
+            exception_str = str(original_exception)
+            if litellm_debug_info is not None:
+                exception_str += litellm_debug_info
+
            asyncio.create_task(
                self.alerting_handler(
-                    message=f"LLM API call failed: {str(original_exception)}",
+                    message=f"LLM API call failed: {exception_str}",
                    level="High",
                    alert_type="llm_exceptions",
                    request_data=request_data,
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8194,9 +8194,10 @@ def exception_type(
            if "Request Timeout Error" in error_str or "Request timed out" in error_str:
                exception_mapping_worked = True
                raise Timeout(
-                    message=f"APITimeoutError - Request timed out. {extra_information} \n error_str: {error_str}",
+                    message=f"APITimeoutError - Request timed out. \nerror_str: {error_str}",
                    model=model,
                    llm_provider=custom_llm_provider,
+                    litellm_debug_info=extra_information,
                )

            if (
@ -8226,10 +8227,11 @@ def exception_type(
                if "This model's maximum context length is" in error_str:
                    exception_mapping_worked = True
                    raise ContextWindowExceededError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif (
                    "invalid_request_error" in error_str
@ -8237,10 +8239,11 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise NotFoundError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif (
                    "invalid_request_error" in error_str
@ -8248,10 +8251,11 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise ContentPolicyViolationError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif (
                    "invalid_request_error" in error_str
@ -8259,17 +8263,19 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise BadRequestError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif "Request too large" in error_str:
                    raise RateLimitError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        model=model,
                        llm_provider=custom_llm_provider,
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif (
                    "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
@ -8277,10 +8283,11 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise AuthenticationError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif "Mistral API raised a streaming error" in error_str:
                    exception_mapping_worked = True
@ -8289,82 +8296,92 @@ def exception_type(
                    )
                    raise APIError(
                        status_code=500,
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider=custom_llm_provider,
                        model=model,
                        request=_request,
+                        litellm_debug_info=extra_information,
                    )
                elif hasattr(original_exception, "status_code"):
                    exception_mapping_worked = True
                    if original_exception.status_code == 401:
                        exception_mapping_worked = True
                        raise AuthenticationError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 404:
                        exception_mapping_worked = True
                        raise NotFoundError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 408:
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            model=model,
                            llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 422:
                        exception_mapping_worked = True
                        raise BadRequestError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
                        raise RateLimitError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 503:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            model=model,
                            llm_provider=custom_llm_provider,
                            response=original_exception.response,
+                            litellm_debug_info=extra_information,
                        )
                    elif original_exception.status_code == 504:  # gateway timeout error
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            model=model,
                            llm_provider=custom_llm_provider,
+                            litellm_debug_info=extra_information,
                        )
                    else:
                        exception_mapping_worked = True
                        raise APIError(
                            status_code=original_exception.status_code,
-                            message=f"{exception_provider} - {message} {extra_information}",
+                            message=f"{exception_provider} - {message}",
                            llm_provider=custom_llm_provider,
                            model=model,
                            request=original_exception.request,
+                            litellm_debug_info=extra_information,
                        )
                else:
                    # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                    raise APIConnectionError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider=custom_llm_provider,
                        model=model,
+                        litellm_debug_info=extra_information,
                        request=httpx.Request(
                            method="POST", url="https://api.openai.com/v1/"
                        ),
@ -8666,10 +8683,11 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise BadRequestError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif (
                    "None Unknown Error." in error_str
@ -8677,26 +8695,29 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise APIError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                        status_code=500,
                        model=model,
                        llm_provider="vertex_ai",
                        request=original_exception.request,
+                        litellm_debug_info=extra_information,
                    )
                elif "403" in error_str:
                    exception_mapping_worked = True
                    raise BadRequestError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
                        response=original_exception.response,
+                        litellm_debug_info=extra_information,
                    )
                elif "The response was blocked." in error_str:
                    exception_mapping_worked = True
                    raise UnprocessableEntityError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
+                        litellm_debug_info=extra_information,
                        response=httpx.Response(
                            status_code=429,
                            request=httpx.Request(
@ -8713,9 +8734,10 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise RateLimitError(
-                        message=f"VertexAIException - {error_str} {extra_information}",
+                        message=f"VertexAIException - {error_str}",
                        model=model,
                        llm_provider="vertex_ai",
+                        litellm_debug_info=extra_information,
                        response=httpx.Response(
                            status_code=429,
                            request=httpx.Request(
@ -8728,18 +8750,20 @@ def exception_type(
                    if original_exception.status_code == 400:
                        exception_mapping_worked = True
                        raise BadRequestError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                            model=model,
                            llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
                            response=original_exception.response,
                        )
                    if original_exception.status_code == 500:
                        exception_mapping_worked = True
                        raise APIError(
-                            message=f"VertexAIException - {error_str} {extra_information}",
+                            message=f"VertexAIException - {error_str}",
                            status_code=500,
                            model=model,
                            llm_provider="vertex_ai",
+                            litellm_debug_info=extra_information,
                            request=original_exception.request,
                        )
            elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
@ -9340,25 +9364,28 @@ def exception_type(
                    exception_mapping_worked = True
                    raise APIError(
                        status_code=500,
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                        llm_provider="azure",
                        model=model,
+                        litellm_debug_info=extra_information,
                        request=httpx.Request(method="POST", url="https://openai.com/"),
                    )
                elif "This model's maximum context length is" in error_str:
                    exception_mapping_worked = True
                    raise ContextWindowExceededError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                        llm_provider="azure",
                        model=model,
+                        litellm_debug_info=extra_information,
                        response=original_exception.response,
                    )
                elif "DeploymentNotFound" in error_str:
                    exception_mapping_worked = True
                    raise NotFoundError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                        llm_provider="azure",
                        model=model,
+                        litellm_debug_info=extra_information,
                        response=original_exception.response,
                    )
                elif (
@ -9370,17 +9397,19 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise ContentPolicyViolationError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                        llm_provider="azure",
                        model=model,
+                        litellm_debug_info=extra_information,
                        response=original_exception.response,
                    )
                elif "invalid_request_error" in error_str:
                    exception_mapping_worked = True
                    raise BadRequestError(
-                        message=f"AzureException - {original_exception.message} {extra_information}",
+                        message=f"AzureException - {original_exception.message}",
                        llm_provider="azure",
                        model=model,
+                        litellm_debug_info=extra_information,
                        response=original_exception.response,
                    )
                elif (
@ -9389,9 +9418,10 @@ def exception_type(
                ):
                    exception_mapping_worked = True
                    raise AuthenticationError(
-                        message=f"{exception_provider} - {original_exception.message} {extra_information}",
+                        message=f"{exception_provider} - {original_exception.message}",
                        llm_provider=custom_llm_provider,
                        model=model,
+                        litellm_debug_info=extra_information,
                        response=original_exception.response,
                    )
                elif hasattr(original_exception, "status_code"):
@ -9399,55 +9429,62 @@ def exception_type(
                    if original_exception.status_code == 401:
                        exception_mapping_worked = True
                        raise AuthenticationError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                            llm_provider="azure",
                            model=model,
+                            litellm_debug_info=extra_information,
                            response=original_exception.response,
                        )
                    elif original_exception.status_code == 408:
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                            model=model,
+                            litellm_debug_info=extra_information,
                            llm_provider="azure",
                        )
                    if original_exception.status_code == 422:
                        exception_mapping_worked = True
                        raise BadRequestError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                            model=model,
                            llm_provider="azure",
+                            litellm_debug_info=extra_information,
                            response=original_exception.response,
                        )
                    elif original_exception.status_code == 429:
                        exception_mapping_worked = True
                        raise RateLimitError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                            model=model,
                            llm_provider="azure",
+                            litellm_debug_info=extra_information,
                            response=original_exception.response,
                        )
                    elif original_exception.status_code == 503:
                        exception_mapping_worked = True
                        raise ServiceUnavailableError(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                            model=model,
                            llm_provider="azure",
+                            litellm_debug_info=extra_information,
                            response=original_exception.response,
                        )
                    elif original_exception.status_code == 504:  # gateway timeout error
                        exception_mapping_worked = True
                        raise Timeout(
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                            model=model,
+                            litellm_debug_info=extra_information,
                            llm_provider="azure",
                        )
                    else:
                        exception_mapping_worked = True
                        raise APIError(
                            status_code=original_exception.status_code,
-                            message=f"AzureException - {original_exception.message} {extra_information}",
+                            message=f"AzureException - {original_exception.message}",
                            llm_provider="azure",
+                            litellm_debug_info=extra_information,
                            model=model,
                            request=httpx.Request(
                                method="POST", url="https://openai.com/"
@ -9456,9 +9493,10 @@ def exception_type(
                else:
                    # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
                    raise APIConnectionError(
-                        message=f"{exception_provider} - {message} {extra_information}",
+                        message=f"{exception_provider} - {message}",
                        llm_provider="azure",
                        model=model,
+                        litellm_debug_info=extra_information,
                        request=httpx.Request(method="POST", url="https://openai.com/"),
                    )
        if (