mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
Fix deepseek 'reasoning_content' error (#8963)
* fix(streaming_handler.py): fix deepseek reasoning content streaming Fixes https://github.com/BerriAI/litellm/issues/8939 * test(test_streaming_handler.py): add unit test to streaming handle 'is_chunk_non_empty' function ensures 'reasoning_content' is handled correctly
This commit is contained in:
parent
b9bddac776
commit
94d28d59e4
3 changed files with 106 additions and 8 deletions
|
@ -755,16 +755,12 @@ class CustomStreamWrapper:
|
||||||
setattr(model_response, k, v)
|
setattr(model_response, k, v)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
def return_processed_chunk_logic( # noqa
|
def is_chunk_non_empty(
|
||||||
self,
|
self,
|
||||||
completion_obj: Dict[str, Any],
|
completion_obj: Dict[str, Any],
|
||||||
model_response: ModelResponseStream,
|
model_response: ModelResponseStream,
|
||||||
response_obj: Dict[str, Any],
|
response_obj: Dict[str, Any],
|
||||||
):
|
) -> bool:
|
||||||
|
|
||||||
print_verbose(
|
|
||||||
f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
|
|
||||||
)
|
|
||||||
if (
|
if (
|
||||||
"content" in completion_obj
|
"content" in completion_obj
|
||||||
and (
|
and (
|
||||||
|
@ -780,6 +776,10 @@ class CustomStreamWrapper:
|
||||||
"function_call" in completion_obj
|
"function_call" in completion_obj
|
||||||
and completion_obj["function_call"] is not None
|
and completion_obj["function_call"] is not None
|
||||||
)
|
)
|
||||||
|
or (
|
||||||
|
"reasoning_content" in model_response.choices[0].delta
|
||||||
|
and model_response.choices[0].delta.reasoning_content is not None
|
||||||
|
)
|
||||||
or (model_response.choices[0].delta.provider_specific_fields is not None)
|
or (model_response.choices[0].delta.provider_specific_fields is not None)
|
||||||
or (
|
or (
|
||||||
"provider_specific_fields" in model_response
|
"provider_specific_fields" in model_response
|
||||||
|
@ -789,8 +789,27 @@ class CustomStreamWrapper:
|
||||||
"provider_specific_fields" in response_obj
|
"provider_specific_fields" in response_obj
|
||||||
and response_obj["provider_specific_fields"] is not None
|
and response_obj["provider_specific_fields"] is not None
|
||||||
)
|
)
|
||||||
): # cannot set content of an OpenAI Object to be an empty string
|
):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def return_processed_chunk_logic( # noqa
|
||||||
|
self,
|
||||||
|
completion_obj: Dict[str, Any],
|
||||||
|
model_response: ModelResponseStream,
|
||||||
|
response_obj: Dict[str, Any],
|
||||||
|
):
|
||||||
|
|
||||||
|
print_verbose(
|
||||||
|
f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
|
||||||
|
)
|
||||||
|
is_chunk_non_empty = self.is_chunk_non_empty(
|
||||||
|
completion_obj, model_response, response_obj
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
is_chunk_non_empty
|
||||||
|
): # cannot set content of an OpenAI Object to be an empty string
|
||||||
self.safety_checker()
|
self.safety_checker()
|
||||||
hold, model_response_str = self.check_special_tokens(
|
hold, model_response_str = self.check_special_tokens(
|
||||||
chunk=completion_obj["content"],
|
chunk=completion_obj["content"],
|
||||||
|
@ -806,7 +825,7 @@ class CustomStreamWrapper:
|
||||||
for choice in original_chunk.choices:
|
for choice in original_chunk.choices:
|
||||||
try:
|
try:
|
||||||
if isinstance(choice, BaseModel):
|
if isinstance(choice, BaseModel):
|
||||||
choice_json = choice.model_dump()
|
choice_json = choice.model_dump() # type: ignore
|
||||||
choice_json.pop(
|
choice_json.pop(
|
||||||
"finish_reason", None
|
"finish_reason", None
|
||||||
) # for mistral etc. which return a value in their last chunk (not-openai compatible).
|
) # for mistral etc. which return a value in their last chunk (not-openai compatible).
|
||||||
|
|
48
tests/litellm/litellm_core_utils/test_streaming_handler.py
Normal file
48
tests/litellm/litellm_core_utils/test_streaming_handler.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
|
||||||
|
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
||||||
|
from litellm.types.utils import ModelResponseStream
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def initialized_custom_stream_wrapper() -> CustomStreamWrapper:
|
||||||
|
streaming_handler = CustomStreamWrapper(
|
||||||
|
completion_stream=None,
|
||||||
|
model=None,
|
||||||
|
logging_obj=MagicMock(),
|
||||||
|
custom_llm_provider=None,
|
||||||
|
)
|
||||||
|
return streaming_handler
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_chunk_non_empty(initialized_custom_stream_wrapper: CustomStreamWrapper):
|
||||||
|
"""Unit test if non-empty when reasoning_content is present"""
|
||||||
|
chunk = {
|
||||||
|
"id": "e89b6501-8ac2-464c-9550-7cd3daf94350",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1741037890,
|
||||||
|
"model": "deepseek-reasoner",
|
||||||
|
"system_fingerprint": "fp_5417b77867_prod0225",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"content": None, "reasoning_content": "."},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
assert initialized_custom_stream_wrapper.is_chunk_non_empty(
|
||||||
|
completion_obj=MagicMock(),
|
||||||
|
model_response=ModelResponseStream(**chunk),
|
||||||
|
response_obj=MagicMock(),
|
||||||
|
)
|
|
@ -360,3 +360,34 @@ def test_o1_parallel_tool_calls(model):
|
||||||
parallel_tool_calls=True,
|
parallel_tool_calls=True,
|
||||||
drop_params=True,
|
drop_params=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_openai_chat_completion_streaming_handler_reasoning_content():
|
||||||
|
from litellm.llms.openai.chat.gpt_transformation import (
|
||||||
|
OpenAIChatCompletionStreamingHandler,
|
||||||
|
)
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
streaming_handler = OpenAIChatCompletionStreamingHandler(
|
||||||
|
streaming_response=MagicMock(),
|
||||||
|
sync_stream=True,
|
||||||
|
)
|
||||||
|
response = streaming_handler.chunk_parser(
|
||||||
|
chunk={
|
||||||
|
"id": "e89b6501-8ac2-464c-9550-7cd3daf94350",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1741037890,
|
||||||
|
"model": "deepseek-reasoner",
|
||||||
|
"system_fingerprint": "fp_5417b77867_prod0225",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"content": None, "reasoning_content": "."},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.choices[0].delta.reasoning_content == "."
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue