diff --git a/dist/litellm-0.1.651.tar.gz b/dist/litellm-0.1.651.tar.gz
new file mode 100644
index 000000000..ba1bfbb4f
Binary files /dev/null and b/dist/litellm-0.1.651.tar.gz differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index b1f1c387f..7e2dcf510 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 8728d4031..3d162ff26 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -123,7 +123,7 @@ def test_openai_chat_completion_call():
             #     raise Exception("invalid role")
             if "content" in chunk["choices"][0]["delta"]:
                 complete_response += chunk["choices"][0]["delta"]["content"]
-            print(f'complete_chunk: {complete_response}')
+            # print(f'complete_chunk: {complete_response}')
         if complete_response.strip() == "": 
             raise Exception("Empty response received")
     except:
diff --git a/litellm/utils.py b/litellm/utils.py
index 7e688c558..220ec8278 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2453,6 +2453,8 @@ class CustomStreamWrapper:
 
     def __next__(self):
         try:
+            # return this for all models
+            model_response = ModelResponse(stream=True)
             completion_obj = {"content": ""} # default to role being assistant
             if self.model in litellm.anthropic_models:
                 chunk = next(self.completion_stream)
@@ -2497,21 +2499,23 @@ class CustomStreamWrapper:
                 completion_obj["content"] = self.handle_cohere_chunk(chunk)
             else: # openai chat/azure models
                 chunk = next(self.completion_stream)
-                completion_obj = chunk["choices"][0]["delta"]
+                model_response = chunk
+                # LOGGING
+                threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
+                return model_response
 
             # LOGGING
             threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
-            # return this for all models
-            model_response = ModelResponse(stream=True)
-            model_response.choices[0].delta = {
-                "content": completion_obj["content"],
-            }
-            if "role" in completion_obj: 
-                model_response.choices[0].delta = completion_obj
+
+            if model_response.choices[0].delta.content == "<special_litellm_token>":
+                model_response.choices[0].delta = {
+                    "content": completion_obj["content"],
+                }
             return model_response
         except StopIteration:
             raise StopIteration
         except Exception as e:
+            print(e)
             model_response = ModelResponse(stream=True)
             model_response.choices[0].finish_reason = "stop"
             return model_response
diff --git a/pyproject.toml b/pyproject.toml
index 90a294583..9bd13f4a7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.650"
+version = "0.1.651"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"