diff --git a/litellm/llms/custom_httpx/http_handler.py b/litellm/llms/custom_httpx/http_handler.py index 0186907a6..d9a5f0a44 100644 --- a/litellm/llms/custom_httpx/http_handler.py +++ b/litellm/llms/custom_httpx/http_handler.py @@ -78,6 +78,7 @@ class MaskedHTTPStatusError(httpx.HTTPStatusError): response=httpx.Response( status_code=original_error.response.status_code, content=original_error.response.content, + headers=original_error.response.headers, ), ) self.message = message diff --git a/tests/local_testing/test_exceptions.py b/tests/local_testing/test_exceptions.py index 67c36928f..18f732378 100644 --- a/tests/local_testing/test_exceptions.py +++ b/tests/local_testing/test_exceptions.py @@ -1146,7 +1146,9 @@ async def test_exception_with_headers_httpx( except litellm.RateLimitError as e: exception_raised = True - assert e.litellm_response_headers is not None + assert ( + e.litellm_response_headers is not None + ), "litellm_response_headers is None" print("e.litellm_response_headers", e.litellm_response_headers) assert int(e.litellm_response_headers["retry-after"]) == cooldown_time