Merge pull request #3812 from afbarbaro/main

Fix issue with delta being None when Deferred / Async Content Filter is enabled on Azure OpenAI
2024-05-24 10:05:08 -07:00 · 2024-05-24 10:05:08 -07:00 · 391a31c0ce
commit 391a31c0ce
parent f8a82f5779 8165dd6b5a
2 changed files with 261 additions and 5 deletions
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -235,6 +235,259 @@ def test_completion_azure_stream_special_char():
    assert len(response_str) > 0


+def test_completion_azure_stream_content_filter_no_delta():
+    """
+    Tests streaming from Azure when the chunks have no delta because they represent the filtered content
+    """
+    try:
+        chunks = [
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {
+                        "content": "",
+                        "role": "assistant"
+                    },
+                    "finish_reason": None,
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {
+                        "content": "This"
+                    },
+                    "finish_reason": None,
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {
+                        "content": " is"
+                    },
+                    "finish_reason": None,
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {
+                        "content": " a"
+                    },
+                    "finish_reason": None,
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {
+                        "content": " dummy"
+                    },
+                    "finish_reason": None,
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {
+                        "content": " response"
+                    },
+                    "finish_reason": None,
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "",
+                "choices": [
+                    {
+                    "finish_reason": None,
+                    "index": 0,
+                    "content_filter_offsets": {
+                        "check_offset": 35159,
+                        "start_offset": 35159,
+                        "end_offset": 36150
+                    },
+                    "content_filter_results": {
+                        "hate": {
+                        "filtered": False,
+                        "severity": "safe"
+                        },
+                        "self_harm": {
+                        "filtered": False,
+                        "severity": "safe"
+                        },
+                        "sexual": {
+                        "filtered": False,
+                        "severity": "safe"
+                        },
+                        "violence": {
+                        "filtered": False,
+                        "severity": "safe"
+                        }
+                    }
+                    }
+                ],
+                "created": 0,
+                "model": "",
+                "object": ""
+                },
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {
+                        "content": "."
+                    },
+                    "finish_reason": None,
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
+                "choices": [
+                    {
+                    "delta": {},
+                    "finish_reason": "stop",
+                    "index": 0
+                    }
+                ],
+                "created": 1716563849,
+                "model": "gpt-4o-2024-05-13",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "fp_5f4bad809a"
+                },
+                {
+                "id": "",
+                "choices": [
+                    {
+                    "finish_reason": None,
+                    "index": 0,
+                    "content_filter_offsets": {
+                        "check_offset": 36150,
+                        "start_offset": 36060,
+                        "end_offset": 37029
+                    },
+                    "content_filter_results": {
+                        "hate": {
+                        "filtered": False,
+                        "severity": "safe"
+                        },
+                        "self_harm": {
+                        "filtered": False,
+                        "severity": "safe"
+                        },
+                        "sexual": {
+                        "filtered": False,
+                        "severity": "safe"
+                        },
+                        "violence": {
+                        "filtered": False,
+                        "severity": "safe"
+                        }
+                    }
+                    }
+                ],
+                "created": 0,
+                "model": "",
+                "object": ""
+                }            
+        ]
+
+        chunk_list = []
+        for chunk in chunks:
+            new_chunk = litellm.ModelResponse(stream=True, id=chunk["id"])
+            if "choices" in chunk and isinstance(chunk["choices"], list):
+                new_choices = []
+                for choice in chunk["choices"]:
+                    if isinstance(choice, litellm.utils.StreamingChoices):
+                        _new_choice = choice
+                    elif isinstance(choice, dict):
+                        _new_choice = litellm.utils.StreamingChoices(**choice)
+                    new_choices.append(_new_choice)
+                new_chunk.choices = new_choices
+            chunk_list.append(new_chunk)
+
+        completion_stream = ModelResponseListIterator(model_responses=chunk_list)
+
+        litellm.set_verbose = True
+
+        response = litellm.CustomStreamWrapper(
+            completion_stream=completion_stream,
+            model="gpt-4-0613",
+            custom_llm_provider="cached_response",
+            logging_obj=litellm.Logging(
+                model="gpt-4-0613",
+                messages=[{"role": "user", "content": "Hey"}],
+                stream=True,
+                call_type="completion",
+                start_time=time.time(),
+                litellm_call_id="12345",
+                function_id="1245",
+            ),
+        )
+
+        for idx, chunk in enumerate(response):
+            complete_response = ""
+            for idx, chunk in enumerate(response):
+                # print
+                delta = chunk.choices[0].delta
+                content = delta.content if delta else None
+                complete_response += content or ""
+                if chunk.choices[0].finish_reason is not None:
+                    break
+            assert len(complete_response) > 0
+
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {str(e)}")
+
+
 def test_completion_cohere_stream_bad_key():
    try:
        litellm.cache = None
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -10646,7 +10646,8 @@ class CustomStreamWrapper:
            data_json = json.loads(chunk[5:])  # chunk.startswith("data:"):
            try:
                if len(data_json["choices"]) > 0:
-                    text = data_json["choices"][0]["delta"].get("content", "")
+                    delta = data_json["choices"][0]["delta"]
+                    text = "" if delta is None else delta.get("content", "")
                    if data_json["choices"][0].get("finish_reason", None):
                        is_finished = True
                        finish_reason = data_json["choices"][0]["finish_reason"]
@ -11414,12 +11415,14 @@ class CustomStreamWrapper:
                model_response.id = original_chunk.id
                self.response_id = original_chunk.id
                if len(original_chunk.choices) > 0:
+                    delta = original_chunk.choices[0].delta
                    if (
-                        original_chunk.choices[0].delta.function_call is not None
-                        or original_chunk.choices[0].delta.tool_calls is not None
+                        delta is not None and (
+                           delta.function_call is not None
+                           or delta.tool_calls is not None
+                        )
                    ):
                        try:
-                            delta = original_chunk.choices[0].delta
                            model_response.system_fingerprint = (
                                original_chunk.system_fingerprint
                            )
@ -11478,7 +11481,7 @@ class CustomStreamWrapper:
                            model_response.choices[0].delta = Delta()
                    else:
                        try:
-                            delta = dict(original_chunk.choices[0].delta)
+                            delta = dict() if original_chunk.choices[0].delta is None else dict(original_chunk.choices[0].delta)
                            print_verbose(f"original delta: {delta}")
                            model_response.choices[0].delta = Delta(**delta)
                            print_verbose(