Merge pull request #3812 from afbarbaro/main

Fix issue with delta being None when Deferred / Async Content Filter is enabled on Azure OpenAI
2024-05-24 10:05:08 -07:00 · 2024-05-24 10:05:08 -07:00 · 391a31c0ce
commit 391a31c0ce
parent f8a82f5779 8165dd6b5a
2 changed files with 261 additions and 5 deletions
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -235,6 +235,259 @@ def test_completion_azure_stream_special_char():
    assert len(response_str) > 0
 def test_completion_azure_stream_content_filter_no_delta():
    """
    Tests streaming from Azure when the chunks have no delta because they represent the filtered content
    """
    try:
        chunks = [
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {
                        "content": "",
                        "role": "assistant"
                    },
                    "finish_reason": None,
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {
                        "content": "This"
                    },
                    "finish_reason": None,
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {
                        "content": " is"
                    },
                    "finish_reason": None,
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {
                        "content": " a"
                    },
                    "finish_reason": None,
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {
                        "content": " dummy"
                    },
                    "finish_reason": None,
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {
                        "content": " response"
                    },
                    "finish_reason": None,
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "",
                "choices": [
                    {
                    "finish_reason": None,
                    "index": 0,
                    "content_filter_offsets": {
                        "check_offset": 35159,
                        "start_offset": 35159,
                        "end_offset": 36150
                    },
                    "content_filter_results": {
                        "hate": {
                        "filtered": False,
                        "severity": "safe"
                        },
                        "self_harm": {
                        "filtered": False,
                        "severity": "safe"
                        },
                        "sexual": {
                        "filtered": False,
                        "severity": "safe"
                        },
                        "violence": {
                        "filtered": False,
                        "severity": "safe"
                        }
                    }
                    }
                ],
                "created": 0,
                "model": "",
                "object": ""
                },
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {
                        "content": "."
                    },
                    "finish_reason": None,
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj",
                "choices": [
                    {
                    "delta": {},
                    "finish_reason": "stop",
                    "index": 0
                    }
                ],
                "created": 1716563849,
                "model": "gpt-4o-2024-05-13",
                "object": "chat.completion.chunk",
                "system_fingerprint": "fp_5f4bad809a"
                },
                {
                "id": "",
                "choices": [
                    {
                    "finish_reason": None,
                    "index": 0,
                    "content_filter_offsets": {
                        "check_offset": 36150,
                        "start_offset": 36060,
                        "end_offset": 37029
                    },
                    "content_filter_results": {
                        "hate": {
                        "filtered": False,
                        "severity": "safe"
                        },
                        "self_harm": {
                        "filtered": False,
                        "severity": "safe"
                        },
                        "sexual": {
                        "filtered": False,
                        "severity": "safe"
                        },
                        "violence": {
                        "filtered": False,
                        "severity": "safe"
                        }
                    }
                    }
                ],
                "created": 0,
                "model": "",
                "object": ""
                }            
        ]
        chunk_list = []
        for chunk in chunks:
            new_chunk = litellm.ModelResponse(stream=True, id=chunk["id"])
            if "choices" in chunk and isinstance(chunk["choices"], list):
                new_choices = []
                for choice in chunk["choices"]:
                    if isinstance(choice, litellm.utils.StreamingChoices):
                        _new_choice = choice
                    elif isinstance(choice, dict):
                        _new_choice = litellm.utils.StreamingChoices(**choice)
                    new_choices.append(_new_choice)
                new_chunk.choices = new_choices
            chunk_list.append(new_chunk)
        completion_stream = ModelResponseListIterator(model_responses=chunk_list)
        litellm.set_verbose = True
        response = litellm.CustomStreamWrapper(
            completion_stream=completion_stream,
            model="gpt-4-0613",
            custom_llm_provider="cached_response",
            logging_obj=litellm.Logging(
                model="gpt-4-0613",
                messages=[{"role": "user", "content": "Hey"}],
                stream=True,
                call_type="completion",
                start_time=time.time(),
                litellm_call_id="12345",
                function_id="1245",
            ),
        )
        for idx, chunk in enumerate(response):
            complete_response = ""
            for idx, chunk in enumerate(response):
                # print
                delta = chunk.choices[0].delta
                content = delta.content if delta else None
                complete_response += content or ""
                if chunk.choices[0].finish_reason is not None:
                    break
            assert len(complete_response) > 0
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")
 def test_completion_cohere_stream_bad_key():
    try:
        litellm.cache = None
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -10646,7 +10646,8 @@ class CustomStreamWrapper:
            data_json = json.loads(chunk[5:])  # chunk.startswith("data:"):
            try:
                if len(data_json["choices"]) > 0:
-                    text = data_json["choices"][0]["delta"].get("content", "")
+                    delta = data_json["choices"][0]["delta"]
                    text = "" if delta is None else delta.get("content", "")
                    if data_json["choices"][0].get("finish_reason", None):
                        is_finished = True
                        finish_reason = data_json["choices"][0]["finish_reason"]
@ -11414,12 +11415,14 @@ class CustomStreamWrapper:
                model_response.id = original_chunk.id
                self.response_id = original_chunk.id
                if len(original_chunk.choices) > 0:
                    delta = original_chunk.choices[0].delta
                    if (
-                        original_chunk.choices[0].delta.function_call is not None
+                        delta is not None and (
-                        or original_chunk.choices[0].delta.tool_calls is not None
+                           delta.function_call is not None
                           or delta.tool_calls is not None
                        )
                    ):
                        try:
                            delta = original_chunk.choices[0].delta
                            model_response.system_fingerprint = (
                                original_chunk.system_fingerprint
                            )
@ -11478,7 +11481,7 @@ class CustomStreamWrapper:
                            model_response.choices[0].delta = Delta()
                    else:
                        try:
-                            delta = dict(original_chunk.choices[0].delta)
+                            delta = dict() if original_chunk.choices[0].delta is None else dict(original_chunk.choices[0].delta)
                            print_verbose(f"original delta: {delta}")
                            model_response.choices[0].delta = Delta(**delta)
                            print_verbose(