diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 92b798d84..b939d6299 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -2018,12 +2018,24 @@ def test_openai_stream_options_call():
     """
 
     assert last_chunk.usage is not None
+    assert isinstance(last_chunk.usage, litellm.Usage)
     assert last_chunk.usage.total_tokens > 0
     assert last_chunk.usage.prompt_tokens > 0
     assert last_chunk.usage.completion_tokens > 0
 
     # assert all non last chunks have usage=None
-    assert all(chunk.usage is None for chunk in chunks[:-1])
+    # Improved assertion with detailed error message
+    non_last_chunks_with_usage = [
+        chunk
+        for chunk in chunks[:-1]
+        if hasattr(chunk, "usage") and chunk.usage is not None
+    ]
+    assert (
+        not non_last_chunks_with_usage
+    ), f"Non-last chunks with usage not None:\n" + "\n".join(
+        f"Chunk ID: {chunk.id}, Usage: {chunk.usage}, Content: {chunk.choices[0].delta.content}"
+        for chunk in non_last_chunks_with_usage
+    )
 
 
 def test_openai_stream_options_call_text_completion():
diff --git a/litellm/utils.py b/litellm/utils.py
index 4dcee6be4..a8e872bd2 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -680,12 +680,6 @@ class ModelResponse(OpenAIObject):
                 usage = usage
         elif stream is None or stream == False:
             usage = Usage()
-        elif (
-            stream == True
-            and stream_options is not None
-            and stream_options.get("include_usage") == True
-        ):
-            usage = Usage()
         if hidden_params:
             self._hidden_params = hidden_params
 
@@ -11107,8 +11101,7 @@ class CustomStreamWrapper:
             model_response.system_fingerprint = self.system_fingerprint
         model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
         model_response._hidden_params["created_at"] = time.time()
-        model_response.choices = [StreamingChoices()]
-        model_response.choices[0].finish_reason = None
+        model_response.choices = [StreamingChoices(finish_reason=None)]
         return model_response
 
     def is_delta_empty(self, delta: Delta) -> bool:
@@ -11463,8 +11456,13 @@ class CustomStreamWrapper:
                 if (
                     self.stream_options is not None
                     and self.stream_options["include_usage"] == True
+                    and response_obj["usage"] is not None
                 ):
-                    model_response.usage = response_obj["usage"]
+                    model_response.usage = litellm.Usage(
+                        prompt_tokens=response_obj["usage"].prompt_tokens,
+                        completion_tokens=response_obj["usage"].completion_tokens,
+                        total_tokens=response_obj["usage"].total_tokens,
+                    )
 
             model_response.model = self.model
             print_verbose(