diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index ea63bbb384..b26f9b7a7d 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index d37cc1c4ef..c577567b77 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -82,7 +82,7 @@ def ai21_completion_call():
         print(f"error occurred: {traceback.format_exc()}")
         pass
 
-
+ai21_completion_call()
 # test on openai completion call
 def test_openai_chat_completion_call():
     try:
@@ -122,7 +122,7 @@ async def completion_call():
         print(f"error occurred: {traceback.format_exc()}")
         pass
 
-asyncio.run(completion_call())
+# asyncio.run(completion_call())
 
 # # test on azure completion call
 # try:
diff --git a/litellm/utils.py b/litellm/utils.py
index e28e9e29aa..d91723eeda 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -87,6 +87,12 @@ class Message(OpenAIObject):
         self.role = role
         self.logprobs = logprobs
 
+class Delta(OpenAIObject):
+    def __init__(self, content="default", logprobs=None, **params):
+        super(Delta, self).__init__(**params)
+        self.content = content
+        self.logprobs = logprobs
+
 
 class Choices(OpenAIObject):
     def __init__(self, finish_reason="stop", index=0, message=Message(), **params):
@@ -95,11 +101,20 @@ class Choices(OpenAIObject):
         self.index = index
         self.message = message
 
+class StreamingChoices(OpenAIObject):
+    def __init__(self, finish_reason="stop", index=0, delta=Delta(), **params):
+        super(StreamingChoices, self).__init__(**params)
+        self.finish_reason = finish_reason
+        self.index = index
+        self.delta = delta
 
 class ModelResponse(OpenAIObject):
-    def __init__(self, choices=None, created=None, model=None, usage=None, **params):
+    def __init__(self, choices=None, created=None, model=None, usage=None, stream=False, **params):
         super(ModelResponse, self).__init__(**params)
-        self.choices = self.choices = choices if choices else [Choices(message=Message())]
+        if stream:
+            self.choices = self.choices = choices if choices else [StreamingChoices()]
+        else:
+            self.choices = self.choices = choices if choices else [Choices()]
         self.created = created
         self.model = model
         self.usage = (
@@ -2274,7 +2289,7 @@ class CustomStreamWrapper:
 
     def __next__(self):
         try:
-            completion_obj = {"role": "assistant", "content": ""}
+            completion_obj = {"content": ""}
             if self.model in litellm.anthropic_models:
                 chunk = next(self.completion_stream)
                 completion_obj["content"] = self.handle_anthropic_chunk(chunk)
@@ -2315,19 +2330,12 @@ class CustomStreamWrapper:
                 return chunk # open ai returns finish_reason, we should just return the openai chunk
             
                 #completion_obj["content"] = self.handle_openai_chat_completion_chunk(chunk)
-            
             # LOGGING
             threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
             # return this for all models
-            return {
-                "choices": 
-                    [
-                        {
-                            "delta": completion_obj,
-                            "finish_reason": None
-                        },
-                    ]
-                }
+            model_response = ModelResponse(stream=True)
+            model_response.choices[0].delta = completion_obj
+            return model_response
         except Exception as e:
             raise StopIteration
     
diff --git a/pyproject.toml b/pyproject.toml
index 9f73e68f10..6347dbbe89 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.609"
+version = "0.1.610"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"