fix(utils.py): ignore none chunk in stream infinite loop check

Fixes https://github.com/BerriAI/litellm/issues/5158#issuecomment-2287156946
2024-08-13 15:06:31 -07:00 · 2024-08-13 15:06:31 -07:00 · 3a1b3227d8
commit 3a1b3227d8
parent b24da18d2d
2 changed files with 24 additions and 11 deletions
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -3251,7 +3251,13 @@ def test_unit_test_custom_stream_wrapper():
        litellm.REPEATED_STREAMING_CHUNK_LIMIT - 1,
    ],
 )
-def test_unit_test_custom_stream_wrapper_repeating_chunk(loop_amount):
+@pytest.mark.parametrize(
    "chunk_value, expected_chunk_fail",
    [("How are you?", True), ("{", False), ("", False), (None, False)],
 )
 def test_unit_test_custom_stream_wrapper_repeating_chunk(
    loop_amount, chunk_value, expected_chunk_fail
 ):
    """
    Test if InternalServerError raised if model enters infinite loop
@ -3269,7 +3275,7 @@ def test_unit_test_custom_stream_wrapper_repeating_chunk(loop_amount):
                "choices": [
                    {
                        "index": 0,
-                        "delta": {"content": "How are you?"},
+                        "delta": {"content": chunk_value},
                        "finish_reason": "stop",
                    }
                ],
@ -3294,7 +3300,9 @@ def test_unit_test_custom_stream_wrapper_repeating_chunk(loop_amount):
        ),
    )
-    if loop_amount > litellm.REPEATED_STREAMING_CHUNK_LIMIT:
+    print(f"expected_chunk_fail: {expected_chunk_fail}")
    if (loop_amount > litellm.REPEATED_STREAMING_CHUNK_LIMIT) and expected_chunk_fail:
        with pytest.raises(litellm.InternalServerError):
            for chunk in response:
                continue
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8662,14 +8662,19 @@ class CustomStreamWrapper:
            # Check if all extracted contents are identical
            if all(content == last_contents[0] for content in last_contents):
-                # All last n chunks are identical
+                if (
-                raise litellm.InternalServerError(
+                    last_contents[0] is not None
-                    message="The model is repeating the same chunk = {}.".format(
+                    and isinstance(last_contents[0], str)
-                        last_contents[0]
+                    and len(last_contents[0]) > 2
-                    ),
+                ):  # ignore empty content - https://github.com/BerriAI/litellm/issues/5158#issuecomment-2287156946
-                    model="",
+                    # All last n chunks are identical
-                    llm_provider="",
+                    raise litellm.InternalServerError(
-                )
+                        message="The model is repeating the same chunk = {}.".format(
                            last_contents[0]
                        ),
                        model="",
                        llm_provider="",
                    )
    def check_special_tokens(self, chunk: str, finish_reason: Optional[str]):
        """