diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py index d05b79dd7f..8327a10464 100644 --- a/litellm/llms/base_llm/chat/transformation.py +++ b/litellm/llms/base_llm/chat/transformation.py @@ -51,6 +51,7 @@ class BaseLLMException(Exception): headers: Optional[Union[dict, httpx.Headers]] = None, request: Optional[httpx.Request] = None, response: Optional[httpx.Response] = None, + body: Optional[dict] = None, ): self.status_code = status_code self.message: str = message @@ -67,6 +68,7 @@ class BaseLLMException(Exception): self.response = httpx.Response( status_code=status_code, request=self.request ) + self.body = body super().__init__( self.message ) # Call the base class constructor with the parameters it needs diff --git a/litellm/llms/openai/common_utils.py b/litellm/llms/openai/common_utils.py index 98a55b4bd3..a8412f867b 100644 --- a/litellm/llms/openai/common_utils.py +++ b/litellm/llms/openai/common_utils.py @@ -19,6 +19,7 @@ class OpenAIError(BaseLLMException): request: Optional[httpx.Request] = None, response: Optional[httpx.Response] = None, headers: Optional[Union[dict, httpx.Headers]] = None, + body: Optional[dict] = None, ): self.status_code = status_code self.message = message @@ -39,6 +40,7 @@ class OpenAIError(BaseLLMException): headers=self.headers, request=self.request, response=self.response, + body=body, ) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 3fddca53e7..ca2b8ec6c7 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -828,13 +828,17 @@ class OpenAIChatCompletion(BaseLLM): except Exception as e: exception_response = getattr(e, "response", None) status_code = getattr(e, "status_code", 500) + exception_body = getattr(e, "body", None) error_headers = getattr(e, "headers", None) if error_headers is None and exception_response: error_headers = getattr(exception_response, "headers", None) message = getattr(e, "message", str(e)) raise OpenAIError( - status_code=status_code, message=message, headers=error_headers + status_code=status_code, + message=message, + headers=error_headers, + body=exception_body, ) def streaming( diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index b61ddd4562..b2a08544f9 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -6057,26 +6057,6 @@ "mode": "chat", "supports_tool_choice": true }, - "jamba-large-1.6": { - "max_tokens": 256000, - "max_input_tokens": 256000, - "max_output_tokens": 256000, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000008, - "litellm_provider": "ai21", - "mode": "chat", - "supports_tool_choice": true - }, - "jamba-mini-1.6": { - "max_tokens": 256000, - "max_input_tokens": 256000, - "max_output_tokens": 256000, - "input_cost_per_token": 0.0000002, - "output_cost_per_token": 0.0000004, - "litellm_provider": "ai21", - "mode": "chat", - "supports_tool_choice": true - }, "jamba-1.5-mini": { "max_tokens": 256000, "max_input_tokens": 256000, @@ -6097,6 +6077,26 @@ "mode": "chat", "supports_tool_choice": true }, + "jamba-large-1.6": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "ai21", + "mode": "chat", + "supports_tool_choice": true + }, + "jamba-mini-1.6": { + "max_tokens": 256000, + "max_input_tokens": 256000, + "max_output_tokens": 256000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000004, + "litellm_provider": "ai21", + "mode": "chat", + "supports_tool_choice": true + }, "j2-mid": { "max_tokens": 8192, "max_input_tokens": 8192, diff --git a/tests/llm_translation/test_openai.py b/tests/llm_translation/test_openai.py index 172c946636..7bbcea2a23 100644 --- a/tests/llm_translation/test_openai.py +++ b/tests/llm_translation/test_openai.py @@ -391,3 +391,34 @@ def test_openai_chat_completion_streaming_handler_reasoning_content(): ) assert response.choices[0].delta.reasoning_content == "." + + +@pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.parametrize("stream_mode", [True, False]) +@pytest.mark.asyncio +async def test_exception_bubbling_up(sync_mode, stream_mode): + """ + make sure code, param, and type are bubbled up + """ + import litellm + + litellm.set_verbose = True + with pytest.raises(Exception) as exc_info: + if sync_mode: + litellm.completion( + model="gpt-4o-mini", + messages=[{"role": "usera", "content": "hi"}], + stream=stream_mode, + sync_stream=sync_mode, + ) + else: + await litellm.acompletion( + model="gpt-4o-mini", + messages=[{"role": "usera", "content": "hi"}], + stream=stream_mode, + sync_stream=sync_mode, + ) + + assert exc_info.value.code == "invalid_request_error" + assert exc_info.value.param == "messages" + assert exc_info.value.type == "invalid_request_error"