diff --git a/litellm/responses/litellm_completion_transformation/streaming_iterator.py b/litellm/responses/litellm_completion_transformation/streaming_iterator.py
index d970746f89..6f2d5bc185 100644
--- a/litellm/responses/litellm_completion_transformation/streaming_iterator.py
+++ b/litellm/responses/litellm_completion_transformation/streaming_iterator.py
@@ -7,15 +7,18 @@ from litellm.responses.litellm_completion_transformation.transformation import (
 )
 from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
 from litellm.types.llms.openai import (
+    OutputTextDeltaEvent,
     ResponseCompletedEvent,
     ResponseInputParam,
     ResponsesAPIOptionalRequestParams,
     ResponsesAPIStreamEvents,
     ResponsesAPIStreamingResponse,
 )
+from litellm.types.utils import Delta as ChatCompletionDelta
 from litellm.types.utils import (
     ModelResponse,
     ModelResponseStream,
+    StreamingChoices,
     TextCompletionResponse,
 )
 
@@ -38,7 +41,7 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
         self.responses_api_request: ResponsesAPIOptionalRequestParams = (
             responses_api_request
         )
-        self.collected_chunks: List[ModelResponseStream] = []
+        self.collected_chat_completion_chunks: List[ModelResponseStream] = []
         self.finished: bool = False
 
     async def __anext__(
@@ -51,7 +54,14 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
                 # Get the next chunk from the stream
                 try:
                     chunk = await self.litellm_custom_stream_wrapper.__anext__()
-                    self.collected_chunks.append(chunk)
+                    self.collected_chat_completion_chunks.append(chunk)
+                    response_api_chunk = (
+                        self._transform_chat_completion_chunk_to_response_api_chunk(
+                            chunk
+                        )
+                    )
+                    if response_api_chunk:
+                        return response_api_chunk
                 except StopAsyncIteration:
                     self.finished = True
                     response_completed_event = self._emit_response_completed_event()
@@ -74,28 +84,65 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
         try:
             while True:
                 if self.finished is True:
-                    raise StopAsyncIteration
+                    raise StopIteration
                 # Get the next chunk from the stream
                 try:
                     chunk = self.litellm_custom_stream_wrapper.__next__()
-                    self.collected_chunks.append(chunk)
-                except StopAsyncIteration:
+                    self.collected_chat_completion_chunks.append(chunk)
+                    response_api_chunk = (
+                        self._transform_chat_completion_chunk_to_response_api_chunk(
+                            chunk
+                        )
+                    )
+                    if response_api_chunk:
+                        return response_api_chunk
+                except StopIteration:
                     self.finished = True
                     response_completed_event = self._emit_response_completed_event()
                     if response_completed_event:
                         return response_completed_event
                     else:
-                        raise StopAsyncIteration
+                        raise StopIteration
 
         except Exception as e:
             # Handle HTTP errors
             self.finished = True
             raise e
 
+    def _transform_chat_completion_chunk_to_response_api_chunk(
+        self, chunk: ModelResponseStream
+    ) -> Optional[ResponsesAPIStreamingResponse]:
+        """
+        Transform a chat completion chunk to a response API chunk.
+
+        This currently only handles emitting the OutputTextDeltaEvent, which is used by other tools using the responses API.
+        """
+        return OutputTextDeltaEvent(
+            type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
+            item_id=chunk.id,
+            output_index=0,
+            content_index=0,
+            delta=self._get_delta_string_from_streaming_choices(chunk.choices),
+        )
+
+    def _get_delta_string_from_streaming_choices(
+        self, choices: List[StreamingChoices]
+    ) -> str:
+        """
+        Get the delta string from the streaming choices
+
+        For now this collected the first choice's delta string.
+
+        It's unclear how users expect litellm to translate multiple-choices-per-chunk to the responses API output.
+        """
+        choice = choices[0]
+        chat_completion_delta: ChatCompletionDelta = choice.delta
+        return chat_completion_delta.content or ""
+
     def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
         litellm_model_response: Optional[
             Union[ModelResponse, TextCompletionResponse]
-        ] = stream_chunk_builder(chunks=self.collected_chunks)
+        ] = stream_chunk_builder(chunks=self.collected_chat_completion_chunks)
         if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
 
             return ResponseCompletedEvent(
diff --git a/litellm/responses/litellm_completion_transformation/transformation.py b/litellm/responses/litellm_completion_transformation/transformation.py
index b1e52eb8f3..c00d6622bf 100644
--- a/litellm/responses/litellm_completion_transformation/transformation.py
+++ b/litellm/responses/litellm_completion_transformation/transformation.py
@@ -7,6 +7,7 @@ from typing import Any, Dict, List, Optional, Union
 from openai.types.responses.tool_param import FunctionToolParam
 
 from litellm.caching import InMemoryCache
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.responses.litellm_completion_transformation.session_handler import (
     ResponsesAPISessionElement,
     SessionHandler,
@@ -88,6 +89,18 @@ class LiteLLMCompletionResponsesConfig:
             "custom_llm_provider": custom_llm_provider,
         }
 
+        # Responses API `Completed` events require usage, we pass `stream_options` to litellm.completion to include usage
+        if stream is True:
+            stream_options = {
+                "include_usage": True,
+            }
+            litellm_completion_request["stream_options"] = stream_options
+            litellm_logging_obj: Optional[LiteLLMLoggingObj] = kwargs.get(
+                "litellm_logging_obj"
+            )
+            if litellm_logging_obj:
+                litellm_logging_obj.stream_options = stream_options
+
         # only pass non-None values
         litellm_completion_request = {
             k: v for k, v in litellm_completion_request.items() if v is not None
diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py
index 3039efb9f7..e050c47080 100644
--- a/litellm/responses/streaming_iterator.py
+++ b/litellm/responses/streaming_iterator.py
@@ -11,7 +11,9 @@ from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
 from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.types.llms.openai import (
+    OutputTextDeltaEvent,
     ResponseCompletedEvent,
+    ResponsesAPIResponse,
     ResponsesAPIStreamEvents,
     ResponsesAPIStreamingResponse,
 )
@@ -212,9 +214,14 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
 
 class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
     """
-    mock iterator - some models like o1-pro do not support streaming, we need to fake a stream
+    Mock iterator—fake a stream by slicing the full response text into
+    5 char deltas, then emit a completed event.
+
+    Models like o1-pro don't support streaming, so we fake it.
     """
 
+    CHUNK_SIZE = 5
+
     def __init__(
         self,
         response: httpx.Response,
@@ -222,49 +229,68 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
         responses_api_provider_config: BaseResponsesAPIConfig,
         logging_obj: LiteLLMLoggingObj,
     ):
-        self.raw_http_response = response
         super().__init__(
             response=response,
             model=model,
             responses_api_provider_config=responses_api_provider_config,
             logging_obj=logging_obj,
         )
-        self.is_done = False
+
+        # one-time transform
+        transformed = (
+            self.responses_api_provider_config.transform_response_api_response(
+                model=self.model,
+                raw_response=response,
+                logging_obj=logging_obj,
+            )
+        )
+        full_text = self._collect_text(transformed)
+
+        # build a list of 5‑char delta events
+        deltas = [
+            OutputTextDeltaEvent(
+                type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
+                delta=full_text[i : i + self.CHUNK_SIZE],
+                item_id=transformed.id,
+                output_index=0,
+                content_index=0,
+            )
+            for i in range(0, len(full_text), self.CHUNK_SIZE)
+        ]
+
+        # append the completed event
+        self._events = deltas + [
+            ResponseCompletedEvent(
+                type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
+                response=transformed,
+            )
+        ]
+        self._idx = 0
 
     def __aiter__(self):
         return self
 
     async def __anext__(self) -> ResponsesAPIStreamingResponse:
-        if self.is_done:
+        if self._idx >= len(self._events):
             raise StopAsyncIteration
-        self.is_done = True
-        transformed_response = (
-            self.responses_api_provider_config.transform_response_api_response(
-                model=self.model,
-                raw_response=self.raw_http_response,
-                logging_obj=self.logging_obj,
-            )
-        )
-        return ResponseCompletedEvent(
-            type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
-            response=transformed_response,
-        )
+        evt = self._events[self._idx]
+        self._idx += 1
+        return evt
 
     def __iter__(self):
         return self
 
     def __next__(self) -> ResponsesAPIStreamingResponse:
-        if self.is_done:
+        if self._idx >= len(self._events):
             raise StopIteration
-        self.is_done = True
-        transformed_response = (
-            self.responses_api_provider_config.transform_response_api_response(
-                model=self.model,
-                raw_response=self.raw_http_response,
-                logging_obj=self.logging_obj,
-            )
-        )
-        return ResponseCompletedEvent(
-            type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
-            response=transformed_response,
-        )
+        evt = self._events[self._idx]
+        self._idx += 1
+        return evt
+
+    def _collect_text(self, resp: ResponsesAPIResponse) -> str:
+        out = ""
+        for out_item in resp.output:
+            if out_item.type == "message":
+                for c in getattr(out_item, "content", []):
+                    out += c.text
+        return out
diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py
index 884d9bda7b..fd39c13604 100644
--- a/tests/llm_responses_api_testing/base_responses_api.py
+++ b/tests/llm_responses_api_testing/base_responses_api.py
@@ -133,11 +133,13 @@ class BaseResponsesAPITest(ABC):
         validate_responses_api_response(response, final_chunk=True)
 
 
-    @pytest.mark.parametrize("sync_mode", [True])
+    @pytest.mark.parametrize("sync_mode", [True, False])
     @pytest.mark.asyncio
     async def test_basic_openai_responses_api_streaming(self, sync_mode):
         litellm._turn_on_debug()
         base_completion_call_args = self.get_base_completion_call_args()
+        collected_content_string = ""
+        response_completed_event = None
         if sync_mode:
             response = litellm.responses(
                 input="Basic ping",
@@ -146,6 +148,10 @@ class BaseResponsesAPITest(ABC):
             )
             for event in response:
                 print("litellm response=", json.dumps(event, indent=4, default=str))
+                if event.type == "response.output_text.delta":
+                    collected_content_string += event.delta
+                elif event.type == "response.completed":
+                    response_completed_event = event
         else:
             response = await litellm.aresponses(
                 input="Basic ping",
@@ -154,5 +160,35 @@ class BaseResponsesAPITest(ABC):
             )
             async for event in response:
                 print("litellm response=", json.dumps(event, indent=4, default=str))
+                if event.type == "response.output_text.delta":
+                    collected_content_string += event.delta
+                elif event.type == "response.completed":
+                    response_completed_event = event
+
+        # assert the delta chunks content had len(collected_content_string) > 0
+        # this content is typically rendered on chat ui's
+        assert len(collected_content_string) > 0
+
+        # assert the response completed event is not None
+        assert response_completed_event is not None
+
+        # assert the response completed event has a response
+        assert response_completed_event.response is not None
+
+        # assert the response completed event includes the usage
+        assert response_completed_event.response.usage is not None
+
+        # basic test assert the usage seems reasonable
+        print("response_completed_event.response.usage=", response_completed_event.response.usage)
+        assert response_completed_event.response.usage.input_tokens > 0 and response_completed_event.response.usage.input_tokens < 100
+        assert response_completed_event.response.usage.output_tokens > 0 and response_completed_event.response.usage.output_tokens < 1000
+        assert response_completed_event.response.usage.total_tokens > 0 and response_completed_event.response.usage.total_tokens < 1000
+
+        # total tokens should be the sum of input and output tokens
+        assert response_completed_event.response.usage.total_tokens == response_completed_event.response.usage.input_tokens + response_completed_event.response.usage.output_tokens
+
+
+
+
 
 
diff --git a/ui/litellm-dashboard/src/components/chat_ui.tsx b/ui/litellm-dashboard/src/components/chat_ui.tsx
index ae8d15cfe1..6f9801c632 100644
--- a/ui/litellm-dashboard/src/components/chat_ui.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui.tsx
@@ -26,6 +26,7 @@ import {
 import { message, Select, Spin, Typography, Tooltip, Input } from "antd";
 import { makeOpenAIChatCompletionRequest } from "./chat_ui/llm_calls/chat_completion";
 import { makeOpenAIImageGenerationRequest } from "./chat_ui/llm_calls/image_generation";
+import { makeOpenAIResponsesRequest } from "./chat_ui/llm_calls/responses_api";
 import { fetchAvailableModels, ModelGroup  } from "./chat_ui/llm_calls/fetch_models";
 import { litellmModeMapping, ModelMode, EndpointType, getEndpointType } from "./chat_ui/mode_endpoint_mapping";
 import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
@@ -137,20 +138,28 @@ const ChatUI: React.FC<ChatUIProps> = ({
   }, [chatHistory]);
 
   const updateTextUI = (role: string, chunk: string, model?: string) => {
-    setChatHistory((prevHistory) => {
-      const lastMessage = prevHistory[prevHistory.length - 1];
-
-      if (lastMessage && lastMessage.role === role && !lastMessage.isImage) {
+    console.log("updateTextUI called with:", role, chunk, model);
+    setChatHistory((prev) => {
+      const last = prev[prev.length - 1];
+      // if the last message is already from this same role, append
+      if (last && last.role === role && !last.isImage) {
+        // build a new object, but only set `model` if it wasn't there already
+        const updated: MessageType = {
+          ...last,
+          content: last.content + chunk,
+          model: last.model ?? model,      // ← only use the passed‐in model on the first chunk
+        };
+        return [...prev.slice(0, -1), updated];
+      } else {
+        // otherwise start a brand new assistant bubble
         return [
-          ...prevHistory.slice(0, prevHistory.length - 1),
-          { 
-            ...lastMessage,
-            content: lastMessage.content + chunk, 
-            model 
+          ...prev,
+          {
+            role,
+            content: chunk,
+            model,                          // model set exactly once here
           },
         ];
-      } else {
-        return [...prevHistory, { role, content: chunk, model }];
       }
     });
   };
@@ -297,7 +306,6 @@ const ChatUI: React.FC<ChatUIProps> = ({
 
     try {
       if (selectedModel) {
-        // Use EndpointType enum for comparison
         if (endpointType === EndpointType.CHAT) {
           // Create chat history for API call - strip out model field and isImage field
           const apiChatHistory = [...chatHistory.filter(msg => !msg.isImage).map(({ role, content }) => ({ role, content })), newUserMessage];
@@ -323,6 +331,21 @@ const ChatUI: React.FC<ChatUIProps> = ({
             selectedTags,
             signal
           );
+        } else if (endpointType === EndpointType.RESPONSES) {
+          // Create chat history for API call - strip out model field and isImage field
+          const apiChatHistory = [...chatHistory.filter(msg => !msg.isImage).map(({ role, content }) => ({ role, content })), newUserMessage];
+          
+          await makeOpenAIResponsesRequest(
+            apiChatHistory,
+            (role, delta, model) => updateTextUI(role, delta, model),
+            selectedModel,
+            effectiveApiKey,
+            selectedTags,
+            signal,
+            updateReasoningContent,
+            updateTimingData,
+            updateUsageData
+          );
         }
       }
     } catch (error) {
@@ -592,7 +615,7 @@ const ChatUI: React.FC<ChatUIProps> = ({
                 onChange={(e) => setInputMessage(e.target.value)}
                 onKeyDown={handleKeyDown}
                 placeholder={
-                  endpointType === EndpointType.CHAT 
+                  endpointType === EndpointType.CHAT || endpointType === EndpointType.RESPONSES
                     ? "Type your message... (Shift+Enter for new line)" 
                     : "Describe the image you want to generate..."
                 }
diff --git a/ui/litellm-dashboard/src/components/chat_ui/EndpointSelector.tsx b/ui/litellm-dashboard/src/components/chat_ui/EndpointSelector.tsx
index 49b1df3e97..12d5acbc70 100644
--- a/ui/litellm-dashboard/src/components/chat_ui/EndpointSelector.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/EndpointSelector.tsx
@@ -19,8 +19,9 @@ const EndpointSelector: React.FC<EndpointSelectorProps> = ({
 }) => {
   // Map endpoint types to their display labels
   const endpointOptions = [
-    { value: EndpointType.CHAT, label: '/chat/completions' },
-    { value: EndpointType.IMAGE, label: '/images/generations' }
+    { value: EndpointType.CHAT, label: '/v1/chat/completions' },
+    { value: EndpointType.RESPONSES, label: '/v1/responses' },
+    { value: EndpointType.IMAGE, label: '/v1/images/generations' },
   ];
 
   return (
diff --git a/ui/litellm-dashboard/src/components/chat_ui/llm_calls/responses_api.tsx b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/responses_api.tsx
new file mode 100644
index 0000000000..744935159b
--- /dev/null
+++ b/ui/litellm-dashboard/src/components/chat_ui/llm_calls/responses_api.tsx
@@ -0,0 +1,131 @@
+import openai from "openai";
+import { message } from "antd";
+import { MessageType } from "../types";
+import { TokenUsage } from "../ResponseMetrics";
+
+export async function makeOpenAIResponsesRequest(
+  messages: MessageType[],
+  updateTextUI: (role: string, delta: string, model?: string) => void,
+  selectedModel: string,
+  accessToken: string | null,
+  tags: string[] = [],
+  signal?: AbortSignal,
+  onReasoningContent?: (content: string) => void,
+  onTimingData?: (timeToFirstToken: number) => void,
+  onUsageData?: (usage: TokenUsage) => void
+) {
+  if (!accessToken) {
+    throw new Error("API key is required");
+  }
+
+  // Base URL should be the current base_url
+  const isLocal = process.env.NODE_ENV === "development";
+  if (isLocal !== true) {
+    console.log = function () {};
+  }
+  
+  const proxyBaseUrl = isLocal
+    ? "http://localhost:4000"
+    : window.location.origin;
+  
+  const client = new openai.OpenAI({
+    apiKey: accessToken,
+    baseURL: proxyBaseUrl,
+    dangerouslyAllowBrowser: true,
+    defaultHeaders: tags && tags.length > 0 ? { 'x-litellm-tags': tags.join(',') } : undefined,
+  });
+
+  try {
+    const startTime = Date.now();
+    let firstTokenReceived = false;
+    
+    // Format messages for the API
+    const formattedInput = messages.map(message => ({
+      role: message.role,
+      content: message.content,
+      type: "message"
+    }));
+
+    // Create request to OpenAI responses API
+    // Use 'any' type to avoid TypeScript issues with the experimental API
+    const response = await (client as any).responses.create({
+      model: selectedModel,
+      input: formattedInput,
+      stream: true,
+    }, { signal });
+
+    for await (const event of response) {
+      console.log("Response event:", event);
+      
+      // Use a type-safe approach to handle events
+      if (typeof event === 'object' && event !== null) {
+        // Handle output text delta
+        // 1) drop any “role” streams
+        if (event.type === "response.role.delta") {
+            continue;
+        }
+
+        // 2) only handle actual text deltas
+        if (event.type === "response.output_text.delta" && typeof event.delta === "string") {
+            const delta = event.delta;
+            console.log("Text delta", delta);
+            // skip pure whitespace/newlines
+            if (delta.trim().length > 0) {
+                updateTextUI("assistant", delta, selectedModel);
+                            
+                // Calculate time to first token
+                if (!firstTokenReceived) {
+                    firstTokenReceived = true;
+                    const timeToFirstToken = Date.now() - startTime;
+                    console.log("First token received! Time:", timeToFirstToken, "ms");
+                    
+                    if (onTimingData) {
+                    onTimingData(timeToFirstToken);
+                    }
+                }
+            
+            }
+        }
+        
+        // Handle reasoning content
+        if (event.type === "response.reasoning.delta" && 'delta' in event) {
+          const delta = event.delta;
+          if (typeof delta === 'string' && onReasoningContent) {
+            onReasoningContent(delta);
+          }
+        }
+        
+        // Handle usage data at the response.completed event
+        if (event.type === "response.completed" && 'response' in event) {
+          const response_obj = event.response;
+          const usage = response_obj.usage;
+          console.log("Usage data:", usage);
+          if (usage && onUsageData) {
+            console.log("Usage data:", usage);
+            
+            // Extract usage data safely
+            const usageData: TokenUsage = {
+              completionTokens: usage.output_tokens,
+              promptTokens: usage.input_tokens,
+              totalTokens: usage.total_tokens
+            };
+            
+            // Add reasoning tokens if available
+            if (usage.completion_tokens_details?.reasoning_tokens) {
+              usageData.reasoningTokens = usage.completion_tokens_details.reasoning_tokens;
+            }
+            
+            onUsageData(usageData);
+          }
+        }
+      }
+    }
+  } catch (error) {
+    if (signal?.aborted) {
+      console.log("Responses API request was cancelled");
+    } else {
+      message.error(`Error occurred while generating model response. Please try again. Error: ${error}`, 20);
+    }
+    throw error; // Re-throw to allow the caller to handle the error
+  }
+} 
\ No newline at end of file
diff --git a/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx b/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
index 0ed0098fac..ea86831842 100644
--- a/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
@@ -4,6 +4,7 @@
 export enum ModelMode {
     IMAGE_GENERATION = "image_generation",
     CHAT = "chat",
+    RESPONSES = "responses",
     // add additional modes as needed
   }
   
@@ -11,6 +12,7 @@ export enum ModelMode {
   export enum EndpointType {
     IMAGE = "image",
     CHAT = "chat",
+    RESPONSES = "responses",
     // add additional endpoint types if required
   }
   
@@ -18,6 +20,7 @@ export enum ModelMode {
   export const litellmModeMapping: Record<ModelMode, EndpointType> = {
     [ModelMode.IMAGE_GENERATION]: EndpointType.IMAGE,
     [ModelMode.CHAT]: EndpointType.CHAT,
+    [ModelMode.RESPONSES]: EndpointType.RESPONSES,
   };
 
   export const getEndpointType = (mode: string): EndpointType => {