refactor: instrument body param to bubble up on exception

2025-04-27 19:54:13 +00:00 · 2025-03-10 15:21:04 -07:00 · 2025-03-10 15:21:04 -07:00 · bb2fa73609
commit bb2fa73609
parent 8bb2c6d188
5 changed files with 60 additions and 21 deletions
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@ -51,6 +51,7 @@ class BaseLLMException(Exception):
        headers: Optional[Union[dict, httpx.Headers]] = None,
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
        body: Optional[dict] = None,
    ):
        self.status_code = status_code
        self.message: str = message
@ -67,6 +68,7 @@ class BaseLLMException(Exception):
            self.response = httpx.Response(
                status_code=status_code, request=self.request
            )
        self.body = body
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/llms/openai/common_utils.py
+++ b/litellm/llms/openai/common_utils.py
@ -19,6 +19,7 @@ class OpenAIError(BaseLLMException):
        request: Optional[httpx.Request] = None,
        response: Optional[httpx.Response] = None,
        headers: Optional[Union[dict, httpx.Headers]] = None,
        body: Optional[dict] = None,
    ):
        self.status_code = status_code
        self.message = message
@ -39,6 +40,7 @@ class OpenAIError(BaseLLMException):
            headers=self.headers,
            request=self.request,
            response=self.response,
            body=body,
        )
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@ -828,13 +828,17 @@ class OpenAIChatCompletion(BaseLLM):
            except Exception as e:
                exception_response = getattr(e, "response", None)
                status_code = getattr(e, "status_code", 500)
                exception_body = getattr(e, "body", None)
                error_headers = getattr(e, "headers", None)
                if error_headers is None and exception_response:
                    error_headers = getattr(exception_response, "headers", None)
                message = getattr(e, "message", str(e))
                raise OpenAIError(
-                    status_code=status_code, message=message, headers=error_headers
+                    status_code=status_code,
                    message=message,
                    headers=error_headers,
                    body=exception_body,
                )
    def streaming(
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -6057,26 +6057,6 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
    "jamba-large-1.6": {
        "max_tokens": 256000,
        "max_input_tokens": 256000,
        "max_output_tokens": 256000,
        "input_cost_per_token": 0.000002,
        "output_cost_per_token": 0.000008,
        "litellm_provider": "ai21",
        "mode": "chat",
        "supports_tool_choice": true
    },
    "jamba-mini-1.6": {
        "max_tokens": 256000,
        "max_input_tokens": 256000,
        "max_output_tokens": 256000,
        "input_cost_per_token": 0.0000002,
        "output_cost_per_token": 0.0000004,
        "litellm_provider": "ai21",
        "mode": "chat",
        "supports_tool_choice": true
    },
    "jamba-1.5-mini": {
        "max_tokens": 256000,
        "max_input_tokens": 256000,
@ -6097,6 +6077,26 @@
        "mode": "chat",
        "supports_tool_choice": true
    },
    "jamba-large-1.6": {
        "max_tokens": 256000,
        "max_input_tokens": 256000,
        "max_output_tokens": 256000,
        "input_cost_per_token": 0.000002,
        "output_cost_per_token": 0.000008,
        "litellm_provider": "ai21",
        "mode": "chat",
        "supports_tool_choice": true
    },
    "jamba-mini-1.6": {
        "max_tokens": 256000,
        "max_input_tokens": 256000,
        "max_output_tokens": 256000,
        "input_cost_per_token": 0.0000002,
        "output_cost_per_token": 0.0000004,
        "litellm_provider": "ai21",
        "mode": "chat",
        "supports_tool_choice": true
    },
    "j2-mid": {
        "max_tokens": 8192,
        "max_input_tokens": 8192,
--- a/tests/llm_translation/test_openai.py
+++ b/tests/llm_translation/test_openai.py
@ -391,3 +391,34 @@ def test_openai_chat_completion_streaming_handler_reasoning_content():
    )
    assert response.choices[0].delta.reasoning_content == "."
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize("stream_mode", [True, False])
@pytest.mark.asyncio
 async def test_exception_bubbling_up(sync_mode, stream_mode):
    """
    make sure code, param, and type are bubbled up
    """
    import litellm
    litellm.set_verbose = True
    with pytest.raises(Exception) as exc_info:
        if sync_mode:
            litellm.completion(
                model="gpt-4o-mini",
                messages=[{"role": "usera", "content": "hi"}],
                stream=stream_mode,
                sync_stream=sync_mode,
            )
        else:
            await litellm.acompletion(
                model="gpt-4o-mini",
                messages=[{"role": "usera", "content": "hi"}],
                stream=stream_mode,
                sync_stream=sync_mode,
            )
    assert exc_info.value.code == "invalid_request_error"
    assert exc_info.value.param == "messages"
    assert exc_info.value.type == "invalid_request_error"