Fix deepseek 'reasoning_content' error (#8963)

* fix(streaming_handler.py): fix deepseek reasoning content streaming Fixes https://github.com/BerriAI/litellm/issues/8939 * test(test_streaming_handler.py): add unit test to streaming handle 'is_chunk_non_empty' function ensures 'reasoning_content' is handled correctly
2025-04-25 18:54:30 +00:00 · 2025-03-03 14:34:10 -08:00 · 2025-03-03 14:34:10 -08:00 · 94d28d59e4
commit 94d28d59e4
parent b9bddac776
3 changed files with 106 additions and 8 deletions
--- a/litellm/litellm_core_utils/streaming_handler.py
+++ b/litellm/litellm_core_utils/streaming_handler.py
@ -755,16 +755,12 @@ class CustomStreamWrapper:
                setattr(model_response, k, v)
        return model_response

-    def return_processed_chunk_logic(  # noqa
+    def is_chunk_non_empty(
        self,
        completion_obj: Dict[str, Any],
        model_response: ModelResponseStream,
        response_obj: Dict[str, Any],
-    ):
-
-        print_verbose(
-            f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
-        )
+    ) -> bool:
        if (
            "content" in completion_obj
            and (
@ -780,6 +776,10 @@ class CustomStreamWrapper:
                "function_call" in completion_obj
                and completion_obj["function_call"] is not None
            )
+            or (
+                "reasoning_content" in model_response.choices[0].delta
+                and model_response.choices[0].delta.reasoning_content is not None
+            )
            or (model_response.choices[0].delta.provider_specific_fields is not None)
            or (
                "provider_specific_fields" in model_response
@ -789,8 +789,27 @@ class CustomStreamWrapper:
                "provider_specific_fields" in response_obj
                and response_obj["provider_specific_fields"] is not None
            )
-        ):  # cannot set content of an OpenAI Object to be an empty string
+        ):
+            return True
+        else:
+            return False

+    def return_processed_chunk_logic(  # noqa
+        self,
+        completion_obj: Dict[str, Any],
+        model_response: ModelResponseStream,
+        response_obj: Dict[str, Any],
+    ):
+
+        print_verbose(
+            f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
+        )
+        is_chunk_non_empty = self.is_chunk_non_empty(
+            completion_obj, model_response, response_obj
+        )
+        if (
+            is_chunk_non_empty
+        ):  # cannot set content of an OpenAI Object to be an empty string
            self.safety_checker()
            hold, model_response_str = self.check_special_tokens(
                chunk=completion_obj["content"],
@ -806,7 +825,7 @@ class CustomStreamWrapper:
                        for choice in original_chunk.choices:
                            try:
                                if isinstance(choice, BaseModel):
-                                    choice_json = choice.model_dump()
+                                    choice_json = choice.model_dump()  # type: ignore
                                    choice_json.pop(
                                        "finish_reason", None
                                    )  # for mistral etc. which return a value in their last chunk (not-openai compatible).
--- a/tests/litellm/litellm_core_utils/test_streaming_handler.py
+++ b/tests/litellm/litellm_core_utils/test_streaming_handler.py
@ -0,0 +1,48 @@
+import json
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
+from litellm.types.utils import ModelResponseStream
+
+
+@pytest.fixture
+def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
+    streaming_handler = CustomStreamWrapper(
+        completion_stream=None,
+        model=None,
+        logging_obj=MagicMock(),
+        custom_llm_provider=None,
+    )
+    return streaming_handler
+
+
+def test_is_chunk_non_empty(initialized_custom_stream_wrapper: CustomStreamWrapper):
+    """Unit test if non-empty when reasoning_content is present"""
+    chunk = {
+        "id": "e89b6501-8ac2-464c-9550-7cd3daf94350",
+        "object": "chat.completion.chunk",
+        "created": 1741037890,
+        "model": "deepseek-reasoner",
+        "system_fingerprint": "fp_5417b77867_prod0225",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {"content": None, "reasoning_content": "."},
+                "logprobs": None,
+                "finish_reason": None,
+            }
+        ],
+    }
+    assert initialized_custom_stream_wrapper.is_chunk_non_empty(
+        completion_obj=MagicMock(),
+        model_response=ModelResponseStream(**chunk),
+        response_obj=MagicMock(),
+    )
--- a/tests/llm_translation/test_openai.py
+++ b/tests/llm_translation/test_openai.py
@ -360,3 +360,34 @@ def test_o1_parallel_tool_calls(model):
        parallel_tool_calls=True,
        drop_params=True,
    )
+
+
+def test_openai_chat_completion_streaming_handler_reasoning_content():
+    from litellm.llms.openai.chat.gpt_transformation import (
+        OpenAIChatCompletionStreamingHandler,
+    )
+    from unittest.mock import MagicMock
+
+    streaming_handler = OpenAIChatCompletionStreamingHandler(
+        streaming_response=MagicMock(),
+        sync_stream=True,
+    )
+    response = streaming_handler.chunk_parser(
+        chunk={
+            "id": "e89b6501-8ac2-464c-9550-7cd3daf94350",
+            "object": "chat.completion.chunk",
+            "created": 1741037890,
+            "model": "deepseek-reasoner",
+            "system_fingerprint": "fp_5417b77867_prod0225",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": None, "reasoning_content": "."},
+                    "logprobs": None,
+                    "finish_reason": None,
+                }
+            ],
+        }
+    )
+
+    assert response.choices[0].delta.reasoning_content == "."