diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 8cefc4ac6..025ea8120 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -3251,7 +3251,13 @@ def test_unit_test_custom_stream_wrapper(): litellm.REPEATED_STREAMING_CHUNK_LIMIT - 1, ], ) -def test_unit_test_custom_stream_wrapper_repeating_chunk(loop_amount): +@pytest.mark.parametrize( + "chunk_value, expected_chunk_fail", + [("How are you?", True), ("{", False), ("", False), (None, False)], +) +def test_unit_test_custom_stream_wrapper_repeating_chunk( + loop_amount, chunk_value, expected_chunk_fail +): """ Test if InternalServerError raised if model enters infinite loop @@ -3269,7 +3275,7 @@ def test_unit_test_custom_stream_wrapper_repeating_chunk(loop_amount): "choices": [ { "index": 0, - "delta": {"content": "How are you?"}, + "delta": {"content": chunk_value}, "finish_reason": "stop", } ], @@ -3294,7 +3300,9 @@ def test_unit_test_custom_stream_wrapper_repeating_chunk(loop_amount): ), ) - if loop_amount > litellm.REPEATED_STREAMING_CHUNK_LIMIT: + print(f"expected_chunk_fail: {expected_chunk_fail}") + + if (loop_amount > litellm.REPEATED_STREAMING_CHUNK_LIMIT) and expected_chunk_fail: with pytest.raises(litellm.InternalServerError): for chunk in response: continue diff --git a/litellm/utils.py b/litellm/utils.py index ef77b5a69..49528d0f7 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8662,14 +8662,19 @@ class CustomStreamWrapper: # Check if all extracted contents are identical if all(content == last_contents[0] for content in last_contents): - # All last n chunks are identical - raise litellm.InternalServerError( - message="The model is repeating the same chunk = {}.".format( - last_contents[0] - ), - model="", - llm_provider="", - ) + if ( + last_contents[0] is not None + and isinstance(last_contents[0], str) + and len(last_contents[0]) > 2 + ): # ignore empty content - https://github.com/BerriAI/litellm/issues/5158#issuecomment-2287156946 + # All last n chunks are identical + raise litellm.InternalServerError( + message="The model is repeating the same chunk = {}.".format( + last_contents[0] + ), + model="", + llm_provider="", + ) def check_special_tokens(self, chunk: str, finish_reason: Optional[str]): """