diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 239dae94db..28a9f9902e 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -465,6 +465,7 @@ def test_completion_mistral_api_stream(): def test_completion_deep_infra_stream(): # deep infra currently includes role in the 2nd chunk # waiting for them to make a fix on this + litellm.set_verbose = True try: messages = [ {"role": "system", "content": "You are a helpful assistant."}, diff --git a/litellm/utils.py b/litellm/utils.py index d3107d758a..95ddb433eb 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8048,6 +8048,7 @@ class CustomStreamWrapper: if len(original_chunk.choices) > 0: try: delta = dict(original_chunk.choices[0].delta) + print_verbose(f"original delta: {delta}") model_response.choices[0].delta = Delta(**delta) except Exception as e: model_response.choices[0].delta = Delta() @@ -8056,9 +8057,21 @@ class CustomStreamWrapper: model_response.system_fingerprint = ( original_chunk.system_fingerprint ) + print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}") if self.sent_first_chunk == False: model_response.choices[0].delta["role"] = "assistant" self.sent_first_chunk = True + elif self.sent_first_chunk == True and hasattr( + model_response.choices[0].delta, "role" + ): + _initial_delta = model_response.choices[ + 0 + ].delta.model_dump() + _initial_delta.pop("role", None) + model_response.choices[0].delta = Delta(**_initial_delta) + print_verbose( + f"model_response.choices[0].delta: {model_response.choices[0].delta}" + ) else: ## else completion_obj["content"] = model_response_str