diff --git a/litellm/main.py b/litellm/main.py
index f5f588f49..f54f70a21 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -144,8 +144,10 @@ from .types.llms.openai import HttpxBinaryResponseContent
 from .types.utils import (
     AdapterCompletionStreamWrapper,
     ChatCompletionMessageToolCall,
+    CompletionTokensDetails,
     FileTypes,
     HiddenParams,
+    PromptTokensDetails,
     all_litellm_params,
 )
 
@@ -5481,7 +5483,13 @@ def stream_chunk_builder(
                 chunks=chunks, messages=messages
             )
         role = chunks[0]["choices"][0]["delta"]["role"]
-        finish_reason = chunks[-1]["choices"][0]["finish_reason"]
+        finish_reason = "stop"
+        for chunk in chunks:
+            if "choices" in chunk and len(chunk["choices"]) > 0:
+                if hasattr(chunk["choices"][0], "finish_reason"):
+                    finish_reason = chunk["choices"][0].finish_reason
+                elif "finish_reason" in chunk["choices"][0]:
+                    finish_reason = chunk["choices"][0]["finish_reason"]
 
         # Initialize the response dictionary
         response = {
@@ -5512,7 +5520,8 @@ def stream_chunk_builder(
         tool_call_chunks = [
             chunk
             for chunk in chunks
-            if "tool_calls" in chunk["choices"][0]["delta"]
+            if len(chunk["choices"]) > 0
+            and "tool_calls" in chunk["choices"][0]["delta"]
             and chunk["choices"][0]["delta"]["tool_calls"] is not None
         ]
 
@@ -5590,7 +5599,8 @@ def stream_chunk_builder(
         function_call_chunks = [
             chunk
             for chunk in chunks
-            if "function_call" in chunk["choices"][0]["delta"]
+            if len(chunk["choices"]) > 0
+            and "function_call" in chunk["choices"][0]["delta"]
             and chunk["choices"][0]["delta"]["function_call"] is not None
         ]
 
@@ -5625,7 +5635,8 @@ def stream_chunk_builder(
         content_chunks = [
             chunk
             for chunk in chunks
-            if "content" in chunk["choices"][0]["delta"]
+            if len(chunk["choices"]) > 0
+            and "content" in chunk["choices"][0]["delta"]
             and chunk["choices"][0]["delta"]["content"] is not None
         ]
 
@@ -5657,6 +5668,8 @@ def stream_chunk_builder(
         ## anthropic prompt caching information ##
         cache_creation_input_tokens: Optional[int] = None
         cache_read_input_tokens: Optional[int] = None
+        completion_tokens_details: Optional[CompletionTokensDetails] = None
+        prompt_tokens_details: Optional[PromptTokensDetails] = None
         for chunk in chunks:
             usage_chunk: Optional[Usage] = None
             if "usage" in chunk:
@@ -5674,6 +5687,26 @@ def stream_chunk_builder(
                     )
                 if "cache_read_input_tokens" in usage_chunk:
                     cache_read_input_tokens = usage_chunk.get("cache_read_input_tokens")
+                if hasattr(usage_chunk, "completion_tokens_details"):
+                    if isinstance(usage_chunk.completion_tokens_details, dict):
+                        completion_tokens_details = CompletionTokensDetails(
+                            **usage_chunk.completion_tokens_details
+                        )
+                    elif isinstance(
+                        usage_chunk.completion_tokens_details, CompletionTokensDetails
+                    ):
+                        completion_tokens_details = (
+                            usage_chunk.completion_tokens_details
+                        )
+                if hasattr(usage_chunk, "prompt_tokens_details"):
+                    if isinstance(usage_chunk.prompt_tokens_details, dict):
+                        prompt_tokens_details = PromptTokensDetails(
+                            **usage_chunk.prompt_tokens_details
+                        )
+                    elif isinstance(
+                        usage_chunk.prompt_tokens_details, PromptTokensDetails
+                    ):
+                        prompt_tokens_details = usage_chunk.prompt_tokens_details
 
         try:
             response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
@@ -5700,6 +5733,11 @@ def stream_chunk_builder(
         if cache_read_input_tokens is not None:
             response["usage"]["cache_read_input_tokens"] = cache_read_input_tokens
 
+        if completion_tokens_details is not None:
+            response["usage"]["completion_tokens_details"] = completion_tokens_details
+        if prompt_tokens_details is not None:
+            response["usage"]["prompt_tokens_details"] = prompt_tokens_details
+
         return convert_to_model_response_object(
             response_object=response,
             model_response_object=model_response,
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 5f36a359e..f5df0a282 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,5 +1,5 @@
 model_list:
-  - model_name: gpt-4o-realtime-audio
+  - model_name: gpt-4o
     litellm_params:
       model: azure/gpt-4o-realtime-preview
       api_key: os.environ/AZURE_SWEDEN_API_KEY
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index ab4f27669..c8adf0bdc 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -11,7 +11,7 @@ from openai.types.completion_usage import (
     CompletionUsage,
     PromptTokensDetails,
 )
-from pydantic import ConfigDict, PrivateAttr
+from pydantic import BaseModel, ConfigDict, PrivateAttr
 from typing_extensions import Callable, Dict, Required, TypedDict, override
 
 from ..litellm_core_utils.core_helpers import map_finish_reason
@@ -677,6 +677,8 @@ class ModelResponse(OpenAIObject):
                         _new_choice = choice
                     elif isinstance(choice, dict):
                         _new_choice = StreamingChoices(**choice)
+                    elif isinstance(choice, BaseModel):
+                        _new_choice = StreamingChoices(**choice.model_dump())
                     new_choices.append(_new_choice)
                 choices = new_choices
             else:
diff --git a/litellm/utils.py b/litellm/utils.py
index 2e8ec7c49..7fcc6e6e1 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7813,9 +7813,7 @@ class CustomStreamWrapper:
                         )
                     elif isinstance(response_obj["usage"], BaseModel):
                         model_response.usage = litellm.Usage(
-                            prompt_tokens=response_obj["usage"].prompt_tokens,
-                            completion_tokens=response_obj["usage"].completion_tokens,
-                            total_tokens=response_obj["usage"].total_tokens,
+                            **response_obj["usage"].model_dump()
                         )
 
             model_response.model = self.model
diff --git a/tests/local_testing/test_stream_chunk_builder.py b/tests/local_testing/test_stream_chunk_builder.py
index 35ce34c0c..ca3ea0e02 100644
--- a/tests/local_testing/test_stream_chunk_builder.py
+++ b/tests/local_testing/test_stream_chunk_builder.py
@@ -5,6 +5,7 @@ import time
 import traceback
 
 import pytest
+from typing import List
 
 sys.path.insert(
     0, os.path.abspath("../..")
@@ -12,7 +13,6 @@ sys.path.insert(
 import os
 
 import dotenv
-import pytest
 from openai import OpenAI
 
 import litellm
@@ -622,3 +622,46 @@ def test_stream_chunk_builder_multiple_tool_calls():
     assert (
         expected_response.choices == response.choices
     ), "\nGot={}\n, Expected={}\n".format(response.choices, expected_response.choices)
+
+
+def test_stream_chunk_builder_openai_prompt_caching():
+    from openai import OpenAI
+    from pydantic import BaseModel
+
+    client = OpenAI(
+        # This is the default and can be omitted
+        api_key=os.getenv("OPENAI_API_KEY"),
+    )
+
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": "Say this is a test",
+            }
+        ],
+        model="gpt-3.5-turbo",
+        stream=True,
+        stream_options={"include_usage": True},
+    )
+    chunks: List[litellm.ModelResponse] = []
+    usage_obj = None
+    for chunk in chat_completion:
+        chunks.append(litellm.ModelResponse(**chunk.model_dump(), stream=True))
+
+    print(f"chunks: {chunks}")
+
+    usage_obj: litellm.Usage = chunks[-1].usage  # type: ignore
+
+    response = stream_chunk_builder(chunks=chunks)
+    print(f"response: {response}")
+    print(f"response usage: {response.usage}")
+    for k, v in usage_obj.model_dump().items():
+        print(k, v)
+        response_usage_value = getattr(response.usage, k)  # type: ignore
+        print(f"response_usage_value: {response_usage_value}")
+        print(f"type: {type(response_usage_value)}")
+        if isinstance(response_usage_value, BaseModel):
+            assert response_usage_value.model_dump() == v
+        else:
+            assert response_usage_value == v