diff --git a/dist/litellm-0.1.646-py3-none-any.whl b/dist/litellm-0.1.646-py3-none-any.whl
new file mode 100644
index 0000000000..a7cbb10004
Binary files /dev/null and b/dist/litellm-0.1.646-py3-none-any.whl differ
diff --git a/dist/litellm-0.1.646.tar.gz b/dist/litellm-0.1.646.tar.gz
new file mode 100644
index 0000000000..9e6beefec5
Binary files /dev/null and b/dist/litellm-0.1.646.tar.gz differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 8ffdbc4ea1..9384cfc960 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/utils.py b/litellm/utils.py
index f19939c7bb..8ee4e3c48f 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2495,13 +2495,15 @@ class CustomStreamWrapper:
                 completion_obj["content"] = self.handle_cohere_chunk(chunk)
             else: # openai chat/azure models
                 chunk = next(self.completion_stream)
-                completion_obj["content"] = chunk["choices"][0]["delta"]["content"]
+                completion_obj = chunk["choices"][0]["delta"]
 
             # LOGGING
             threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
             # return this for all models
             model_response = ModelResponse(stream=True)
             model_response.choices[0].delta.content = completion_obj["content"]
+            if "role" in completion_obj: 
+                model_response.choices[0].delta.role = completion_obj["role"]
             return model_response
         except StopIteration:
             raise StopIteration
diff --git a/pyproject.toml b/pyproject.toml
index 98a4c39752..1760797fb8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.646"
+version = "0.1.647"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"