diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 239dae94db..28a9f9902e 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -465,6 +465,7 @@ def test_completion_mistral_api_stream():
 def test_completion_deep_infra_stream():
     # deep infra currently includes role in the 2nd chunk
     # waiting for them to make a fix on this
+    litellm.set_verbose = True
     try:
         messages = [
             {"role": "system", "content": "You are a helpful assistant."},
diff --git a/litellm/utils.py b/litellm/utils.py
index d3107d758a..95ddb433eb 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -8048,6 +8048,7 @@ class CustomStreamWrapper:
                         if len(original_chunk.choices) > 0:
                             try:
                                 delta = dict(original_chunk.choices[0].delta)
+                                print_verbose(f"original delta: {delta}")
                                 model_response.choices[0].delta = Delta(**delta)
                             except Exception as e:
                                 model_response.choices[0].delta = Delta()
@@ -8056,9 +8057,21 @@ class CustomStreamWrapper:
                         model_response.system_fingerprint = (
                             original_chunk.system_fingerprint
                         )
+                        print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
                         if self.sent_first_chunk == False:
                             model_response.choices[0].delta["role"] = "assistant"
                             self.sent_first_chunk = True
+                        elif self.sent_first_chunk == True and hasattr(
+                            model_response.choices[0].delta, "role"
+                        ):
+                            _initial_delta = model_response.choices[
+                                0
+                            ].delta.model_dump()
+                            _initial_delta.pop("role", None)
+                            model_response.choices[0].delta = Delta(**_initial_delta)
+                        print_verbose(
+                            f"model_response.choices[0].delta: {model_response.choices[0].delta}"
+                        )
                     else:
                         ## else
                         completion_obj["content"] = model_response_str