Fix deepseek 'reasoning_content' error (#8963)

* fix(streaming_handler.py): fix deepseek reasoning content streaming

Fixes https://github.com/BerriAI/litellm/issues/8939

* test(test_streaming_handler.py): add unit test to streaming handle 'is_chunk_non_empty' function

ensures 'reasoning_content' is handled correctly
This commit is contained in:
Krish Dholakia 2025-03-03 14:34:10 -08:00 committed by GitHub
parent b9bddac776
commit 94d28d59e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 106 additions and 8 deletions

View file

@ -755,16 +755,12 @@ class CustomStreamWrapper:
setattr(model_response, k, v)
return model_response
def return_processed_chunk_logic( # noqa
def is_chunk_non_empty(
self,
completion_obj: Dict[str, Any],
model_response: ModelResponseStream,
response_obj: Dict[str, Any],
):
print_verbose(
f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
)
) -> bool:
if (
"content" in completion_obj
and (
@ -780,6 +776,10 @@ class CustomStreamWrapper:
"function_call" in completion_obj
and completion_obj["function_call"] is not None
)
or (
"reasoning_content" in model_response.choices[0].delta
and model_response.choices[0].delta.reasoning_content is not None
)
or (model_response.choices[0].delta.provider_specific_fields is not None)
or (
"provider_specific_fields" in model_response
@ -789,8 +789,27 @@ class CustomStreamWrapper:
"provider_specific_fields" in response_obj
and response_obj["provider_specific_fields"] is not None
)
): # cannot set content of an OpenAI Object to be an empty string
):
return True
else:
return False
def return_processed_chunk_logic( # noqa
self,
completion_obj: Dict[str, Any],
model_response: ModelResponseStream,
response_obj: Dict[str, Any],
):
print_verbose(
f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
)
is_chunk_non_empty = self.is_chunk_non_empty(
completion_obj, model_response, response_obj
)
if (
is_chunk_non_empty
): # cannot set content of an OpenAI Object to be an empty string
self.safety_checker()
hold, model_response_str = self.check_special_tokens(
chunk=completion_obj["content"],
@ -806,7 +825,7 @@ class CustomStreamWrapper:
for choice in original_chunk.choices:
try:
if isinstance(choice, BaseModel):
choice_json = choice.model_dump()
choice_json = choice.model_dump() # type: ignore
choice_json.pop(
"finish_reason", None
) # for mistral etc. which return a value in their last chunk (not-openai compatible).

View file

@ -0,0 +1,48 @@
import json
import os
import sys
from unittest.mock import MagicMock, patch
import pytest
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
from litellm.types.utils import ModelResponseStream
@pytest.fixture
def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
streaming_handler = CustomStreamWrapper(
completion_stream=None,
model=None,
logging_obj=MagicMock(),
custom_llm_provider=None,
)
return streaming_handler
def test_is_chunk_non_empty(initialized_custom_stream_wrapper: CustomStreamWrapper):
"""Unit test if non-empty when reasoning_content is present"""
chunk = {
"id": "e89b6501-8ac2-464c-9550-7cd3daf94350",
"object": "chat.completion.chunk",
"created": 1741037890,
"model": "deepseek-reasoner",
"system_fingerprint": "fp_5417b77867_prod0225",
"choices": [
{
"index": 0,
"delta": {"content": None, "reasoning_content": "."},
"logprobs": None,
"finish_reason": None,
}
],
}
assert initialized_custom_stream_wrapper.is_chunk_non_empty(
completion_obj=MagicMock(),
model_response=ModelResponseStream(**chunk),
response_obj=MagicMock(),
)

View file

@ -360,3 +360,34 @@ def test_o1_parallel_tool_calls(model):
parallel_tool_calls=True,
drop_params=True,
)
def test_openai_chat_completion_streaming_handler_reasoning_content():
from litellm.llms.openai.chat.gpt_transformation import (
OpenAIChatCompletionStreamingHandler,
)
from unittest.mock import MagicMock
streaming_handler = OpenAIChatCompletionStreamingHandler(
streaming_response=MagicMock(),
sync_stream=True,
)
response = streaming_handler.chunk_parser(
chunk={
"id": "e89b6501-8ac2-464c-9550-7cd3daf94350",
"object": "chat.completion.chunk",
"created": 1741037890,
"model": "deepseek-reasoner",
"system_fingerprint": "fp_5417b77867_prod0225",
"choices": [
{
"index": 0,
"delta": {"content": None, "reasoning_content": "."},
"logprobs": None,
"finish_reason": None,
}
],
}
)
assert response.choices[0].delta.reasoning_content == "."