diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 237d3895d..3b60896d4 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -235,6 +235,259 @@ def test_completion_azure_stream_special_char(): assert len(response_str) > 0 +def test_completion_azure_stream_content_filter_no_delta(): + """ + Tests streaming from Azure when the chunks have no delta because they represent the filtered content + """ + try: + chunks = [ + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": { + "content": "", + "role": "assistant" + }, + "finish_reason": None, + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": { + "content": "This" + }, + "finish_reason": None, + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": { + "content": " is" + }, + "finish_reason": None, + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": { + "content": " a" + }, + "finish_reason": None, + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": { + "content": " dummy" + }, + "finish_reason": None, + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": { + "content": " response" + }, + "finish_reason": None, + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "", + "choices": [ + { + "finish_reason": None, + "index": 0, + "content_filter_offsets": { + "check_offset": 35159, + "start_offset": 35159, + "end_offset": 36150 + }, + "content_filter_results": { + "hate": { + "filtered": False, + "severity": "safe" + }, + "self_harm": { + "filtered": False, + "severity": "safe" + }, + "sexual": { + "filtered": False, + "severity": "safe" + }, + "violence": { + "filtered": False, + "severity": "safe" + } + } + } + ], + "created": 0, + "model": "", + "object": "" + }, + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": { + "content": "." + }, + "finish_reason": None, + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "chatcmpl-9SQxdH5hODqkWyJopWlaVOOUnFwlj", + "choices": [ + { + "delta": {}, + "finish_reason": "stop", + "index": 0 + } + ], + "created": 1716563849, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_5f4bad809a" + }, + { + "id": "", + "choices": [ + { + "finish_reason": None, + "index": 0, + "content_filter_offsets": { + "check_offset": 36150, + "start_offset": 36060, + "end_offset": 37029 + }, + "content_filter_results": { + "hate": { + "filtered": False, + "severity": "safe" + }, + "self_harm": { + "filtered": False, + "severity": "safe" + }, + "sexual": { + "filtered": False, + "severity": "safe" + }, + "violence": { + "filtered": False, + "severity": "safe" + } + } + } + ], + "created": 0, + "model": "", + "object": "" + } + ] + + chunk_list = [] + for chunk in chunks: + new_chunk = litellm.ModelResponse(stream=True, id=chunk["id"]) + if "choices" in chunk and isinstance(chunk["choices"], list): + new_choices = [] + for choice in chunk["choices"]: + if isinstance(choice, litellm.utils.StreamingChoices): + _new_choice = choice + elif isinstance(choice, dict): + _new_choice = litellm.utils.StreamingChoices(**choice) + new_choices.append(_new_choice) + new_chunk.choices = new_choices + chunk_list.append(new_chunk) + + completion_stream = ModelResponseListIterator(model_responses=chunk_list) + + litellm.set_verbose = True + + response = litellm.CustomStreamWrapper( + completion_stream=completion_stream, + model="gpt-4-0613", + custom_llm_provider="cached_response", + logging_obj=litellm.Logging( + model="gpt-4-0613", + messages=[{"role": "user", "content": "Hey"}], + stream=True, + call_type="completion", + start_time=time.time(), + litellm_call_id="12345", + function_id="1245", + ), + ) + + for idx, chunk in enumerate(response): + complete_response = "" + for idx, chunk in enumerate(response): + # print + delta = chunk.choices[0].delta + content = delta.content if delta else None + complete_response += content or "" + if chunk.choices[0].finish_reason is not None: + break + assert len(complete_response) > 0 + + except Exception as e: + pytest.fail(f"An exception occurred - {str(e)}") + + def test_completion_cohere_stream_bad_key(): try: litellm.cache = None diff --git a/litellm/utils.py b/litellm/utils.py index 0f2a46f68..880c701db 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -10646,7 +10646,8 @@ class CustomStreamWrapper: data_json = json.loads(chunk[5:]) # chunk.startswith("data:"): try: if len(data_json["choices"]) > 0: - text = data_json["choices"][0]["delta"].get("content", "") + delta = data_json["choices"][0]["delta"] + text = "" if delta is None else delta.get("content", "") if data_json["choices"][0].get("finish_reason", None): is_finished = True finish_reason = data_json["choices"][0]["finish_reason"] @@ -11414,12 +11415,14 @@ class CustomStreamWrapper: model_response.id = original_chunk.id self.response_id = original_chunk.id if len(original_chunk.choices) > 0: + delta = original_chunk.choices[0].delta if ( - original_chunk.choices[0].delta.function_call is not None - or original_chunk.choices[0].delta.tool_calls is not None + delta is not None and ( + delta.function_call is not None + or delta.tool_calls is not None + ) ): try: - delta = original_chunk.choices[0].delta model_response.system_fingerprint = ( original_chunk.system_fingerprint ) @@ -11478,7 +11481,7 @@ class CustomStreamWrapper: model_response.choices[0].delta = Delta() else: try: - delta = dict(original_chunk.choices[0].delta) + delta = dict() if original_chunk.choices[0].delta is None else dict(original_chunk.choices[0].delta) print_verbose(f"original delta: {delta}") model_response.choices[0].delta = Delta(**delta) print_verbose(