diff --git a/litellm/litellm_core_utils/streaming_handler.py b/litellm/litellm_core_utils/streaming_handler.py index a1d29f3284..4ce27bfeca 100644 --- a/litellm/litellm_core_utils/streaming_handler.py +++ b/litellm/litellm_core_utils/streaming_handler.py @@ -755,16 +755,12 @@ class CustomStreamWrapper: setattr(model_response, k, v) return model_response - def return_processed_chunk_logic( # noqa + def is_chunk_non_empty( self, completion_obj: Dict[str, Any], model_response: ModelResponseStream, response_obj: Dict[str, Any], - ): - - print_verbose( - f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}" - ) + ) -> bool: if ( "content" in completion_obj and ( @@ -780,6 +776,10 @@ class CustomStreamWrapper: "function_call" in completion_obj and completion_obj["function_call"] is not None ) + or ( + "reasoning_content" in model_response.choices[0].delta + and model_response.choices[0].delta.reasoning_content is not None + ) or (model_response.choices[0].delta.provider_specific_fields is not None) or ( "provider_specific_fields" in model_response @@ -789,8 +789,27 @@ class CustomStreamWrapper: "provider_specific_fields" in response_obj and response_obj["provider_specific_fields"] is not None ) - ): # cannot set content of an OpenAI Object to be an empty string + ): + return True + else: + return False + def return_processed_chunk_logic( # noqa + self, + completion_obj: Dict[str, Any], + model_response: ModelResponseStream, + response_obj: Dict[str, Any], + ): + + print_verbose( + f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}" + ) + is_chunk_non_empty = self.is_chunk_non_empty( + completion_obj, model_response, response_obj + ) + if ( + is_chunk_non_empty + ): # cannot set content of an OpenAI Object to be an empty string self.safety_checker() hold, model_response_str = self.check_special_tokens( chunk=completion_obj["content"], @@ -806,7 +825,7 @@ class CustomStreamWrapper: for choice in original_chunk.choices: try: if isinstance(choice, BaseModel): - choice_json = choice.model_dump() + choice_json = choice.model_dump() # type: ignore choice_json.pop( "finish_reason", None ) # for mistral etc. which return a value in their last chunk (not-openai compatible). diff --git a/tests/litellm/litellm_core_utils/test_streaming_handler.py b/tests/litellm/litellm_core_utils/test_streaming_handler.py new file mode 100644 index 0000000000..7595c19155 --- /dev/null +++ b/tests/litellm/litellm_core_utils/test_streaming_handler.py @@ -0,0 +1,48 @@ +import json +import os +import sys +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert( + 0, os.path.abspath("../../..") +) # Adds the parent directory to the system path + +from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper +from litellm.types.utils import ModelResponseStream + + +@pytest.fixture +def initialized_custom_stream_wrapper() -> CustomStreamWrapper: + streaming_handler = CustomStreamWrapper( + completion_stream=None, + model=None, + logging_obj=MagicMock(), + custom_llm_provider=None, + ) + return streaming_handler + + +def test_is_chunk_non_empty(initialized_custom_stream_wrapper: CustomStreamWrapper): + """Unit test if non-empty when reasoning_content is present""" + chunk = { + "id": "e89b6501-8ac2-464c-9550-7cd3daf94350", + "object": "chat.completion.chunk", + "created": 1741037890, + "model": "deepseek-reasoner", + "system_fingerprint": "fp_5417b77867_prod0225", + "choices": [ + { + "index": 0, + "delta": {"content": None, "reasoning_content": "."}, + "logprobs": None, + "finish_reason": None, + } + ], + } + assert initialized_custom_stream_wrapper.is_chunk_non_empty( + completion_obj=MagicMock(), + model_response=ModelResponseStream(**chunk), + response_obj=MagicMock(), + ) diff --git a/tests/llm_translation/test_openai.py b/tests/llm_translation/test_openai.py index 2ee1969ddd..172c946636 100644 --- a/tests/llm_translation/test_openai.py +++ b/tests/llm_translation/test_openai.py @@ -360,3 +360,34 @@ def test_o1_parallel_tool_calls(model): parallel_tool_calls=True, drop_params=True, ) + + +def test_openai_chat_completion_streaming_handler_reasoning_content(): + from litellm.llms.openai.chat.gpt_transformation import ( + OpenAIChatCompletionStreamingHandler, + ) + from unittest.mock import MagicMock + + streaming_handler = OpenAIChatCompletionStreamingHandler( + streaming_response=MagicMock(), + sync_stream=True, + ) + response = streaming_handler.chunk_parser( + chunk={ + "id": "e89b6501-8ac2-464c-9550-7cd3daf94350", + "object": "chat.completion.chunk", + "created": 1741037890, + "model": "deepseek-reasoner", + "system_fingerprint": "fp_5417b77867_prod0225", + "choices": [ + { + "index": 0, + "delta": {"content": None, "reasoning_content": "."}, + "logprobs": None, + "finish_reason": None, + } + ], + } + ) + + assert response.choices[0].delta.reasoning_content == "."