diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx
index 5cd37776d..c86805b14 100644
--- a/docs/docs/providers/agents/index.mdx
+++ b/docs/docs/providers/agents/index.mdx
@@ -1,12 +1,12 @@
 ---
 description: "Agents API for creating and interacting with agentic systems.
 
-    Main functionalities provided by this API:
-    - Create agents with specific instructions and ability to use tools.
-    - Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".
-    - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
-    - Agents can be provided with various shields (see the Safety API for more details).
-    - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details."
+Main functionalities provided by this API:
+- Create agents with specific instructions and ability to use tools.
+- Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\".
+- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
+- Agents can be provided with various shields (see the Safety API for more details).
+- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details."
 sidebar_label: Agents
 title: Agents
 ---
@@ -17,11 +17,11 @@ title: Agents
 
 Agents API for creating and interacting with agentic systems.
 
-    Main functionalities provided by this API:
-    - Create agents with specific instructions and ability to use tools.
-    - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
-    - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
-    - Agents can be provided with various shields (see the Safety API for more details).
-    - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
+Main functionalities provided by this API:
+- Create agents with specific instructions and ability to use tools.
+- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
+- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
+- Agents can be provided with various shields (see the Safety API for more details).
+- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
 
 This section contains documentation for all available providers for the **agents** API.
diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx
index 2c64b277f..18e5e314d 100644
--- a/docs/docs/providers/batches/index.mdx
+++ b/docs/docs/providers/batches/index.mdx
@@ -1,14 +1,14 @@
 ---
 description: "The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+particularly useful for processing large datasets, batch evaluation workflows, and
+cost-effective inference at scale.
 
-    The API is designed to allow use of openai client libraries for seamless integration.
+The API is designed to allow use of openai client libraries for seamless integration.
 
-    This API provides the following extensions:
-     - idempotent batch creation
+This API provides the following extensions:
+ - idempotent batch creation
 
-    Note: This API is currently under active development and may undergo changes."
+Note: This API is currently under active development and may undergo changes."
 sidebar_label: Batches
 title: Batches
 ---
@@ -18,14 +18,14 @@ title: Batches
 ## Overview
 
 The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+particularly useful for processing large datasets, batch evaluation workflows, and
+cost-effective inference at scale.
 
-    The API is designed to allow use of openai client libraries for seamless integration.
+The API is designed to allow use of openai client libraries for seamless integration.
 
-    This API provides the following extensions:
-     - idempotent batch creation
+This API provides the following extensions:
+ - idempotent batch creation
 
-    Note: This API is currently under active development and may undergo changes.
+Note: This API is currently under active development and may undergo changes.
 
 This section contains documentation for all available providers for the **batches** API.
diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index ebbaf1be1..1dc479675 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -1,9 +1,9 @@
 ---
 description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
-    - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search."
+This API provides the raw interface to the underlying models. Two kinds of models are supported:
+- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
+- Embedding models: these models generate embeddings to be used for semantic search."
 sidebar_label: Inference
 title: Inference
 ---
@@ -14,8 +14,8 @@ title: Inference
 
 Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
-    - LLM models: these models generate "raw" and "chat" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search.
+This API provides the raw interface to the underlying models. Two kinds of models are supported:
+- LLM models: these models generate "raw" and "chat" (conversational) completions.
+- Embedding models: these models generate embeddings to be used for semantic search.
 
 This section contains documentation for all available providers for the **inference** API.
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 9e28e0f42..9cf75631d 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -8614,6 +8614,12 @@
                     {
                         "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
                     },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
+                    },
                     {
                         "$ref": "#/components/schemas/OpenAIResponseMessage"
                     }
@@ -9031,6 +9037,68 @@
                 "title": "OpenAIResponseInputToolWebSearch",
                 "description": "Web search tool configuration for OpenAI response inputs."
             },
+            "OpenAIResponseMCPApprovalRequest": {
+                "type": "object",
+                "properties": {
+                    "arguments": {
+                        "type": "string"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "name": {
+                        "type": "string"
+                    },
+                    "server_label": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_approval_request",
+                        "default": "mcp_approval_request"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "arguments",
+                    "id",
+                    "name",
+                    "server_label",
+                    "type"
+                ],
+                "title": "OpenAIResponseMCPApprovalRequest",
+                "description": "A request for human approval of a tool invocation."
+            },
+            "OpenAIResponseMCPApprovalResponse": {
+                "type": "object",
+                "properties": {
+                    "approval_request_id": {
+                        "type": "string"
+                    },
+                    "approve": {
+                        "type": "boolean"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "mcp_approval_response",
+                        "default": "mcp_approval_response"
+                    },
+                    "id": {
+                        "type": "string"
+                    },
+                    "reason": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "approval_request_id",
+                    "approve",
+                    "type"
+                ],
+                "title": "OpenAIResponseMCPApprovalResponse",
+                "description": "A response to an MCP approval request."
+            },
             "OpenAIResponseMessage": {
                 "type": "object",
                 "properties": {
@@ -9539,6 +9607,9 @@
                     },
                     {
                         "$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                    },
+                    {
+                        "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
                     }
                 ],
                 "discriminator": {
@@ -9549,7 +9620,8 @@
                         "file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
                         "function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
                         "mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
-                        "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+                        "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools",
+                        "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
                     }
                 }
             },
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 1c06c74a5..f204277bd 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -6254,6 +6254,8 @@ components:
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
         - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
         - $ref: '#/components/schemas/OpenAIResponseMessage'
     "OpenAIResponseInputFunctionToolCallOutput":
       type: object
@@ -6548,6 +6550,53 @@ components:
       title: OpenAIResponseInputToolWebSearch
       description: >-
         Web search tool configuration for OpenAI response inputs.
+    OpenAIResponseMCPApprovalRequest:
+      type: object
+      properties:
+        arguments:
+          type: string
+        id:
+          type: string
+        name:
+          type: string
+        server_label:
+          type: string
+        type:
+          type: string
+          const: mcp_approval_request
+          default: mcp_approval_request
+      additionalProperties: false
+      required:
+        - arguments
+        - id
+        - name
+        - server_label
+        - type
+      title: OpenAIResponseMCPApprovalRequest
+      description: >-
+        A request for human approval of a tool invocation.
+    OpenAIResponseMCPApprovalResponse:
+      type: object
+      properties:
+        approval_request_id:
+          type: string
+        approve:
+          type: boolean
+        type:
+          type: string
+          const: mcp_approval_response
+          default: mcp_approval_response
+        id:
+          type: string
+        reason:
+          type: string
+      additionalProperties: false
+      required:
+        - approval_request_id
+        - approve
+        - type
+      title: OpenAIResponseMCPApprovalResponse
+      description: A response to an MCP approval request.
     OpenAIResponseMessage:
       type: object
       properties:
@@ -6944,6 +6993,7 @@ components:
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
         - $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+        - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
       discriminator:
         propertyName: type
         mapping:
@@ -6953,6 +7003,7 @@ components:
           function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
           mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
           mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+          mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
     OpenAIResponseOutputMessageMCPCall:
       type: object
       properties:
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index 591992479..b449ff8c0 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -276,13 +276,40 @@ class OpenAIResponseOutputMessageMCPListTools(BaseModel):
     tools: list[MCPListToolsTool]
 
 
+@json_schema_type
+class OpenAIResponseMCPApprovalRequest(BaseModel):
+    """
+    A request for human approval of a tool invocation.
+    """
+
+    arguments: str
+    id: str
+    name: str
+    server_label: str
+    type: Literal["mcp_approval_request"] = "mcp_approval_request"
+
+
+@json_schema_type
+class OpenAIResponseMCPApprovalResponse(BaseModel):
+    """
+    A response to an MCP approval request.
+    """
+
+    approval_request_id: str
+    approve: bool
+    type: Literal["mcp_approval_response"] = "mcp_approval_response"
+    id: str | None = None
+    reason: str | None = None
+
+
 OpenAIResponseOutput = Annotated[
     OpenAIResponseMessage
     | OpenAIResponseOutputMessageWebSearchToolCall
     | OpenAIResponseOutputMessageFileSearchToolCall
     | OpenAIResponseOutputMessageFunctionToolCall
     | OpenAIResponseOutputMessageMCPCall
-    | OpenAIResponseOutputMessageMCPListTools,
+    | OpenAIResponseOutputMessageMCPListTools
+    | OpenAIResponseMCPApprovalRequest,
     Field(discriminator="type"),
 ]
 register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
@@ -725,6 +752,8 @@ OpenAIResponseInput = Annotated[
     | OpenAIResponseOutputMessageFileSearchToolCall
     | OpenAIResponseOutputMessageFunctionToolCall
     | OpenAIResponseInputFunctionToolCallOutput
+    | OpenAIResponseMCPApprovalRequest
+    | OpenAIResponseMCPApprovalResponse
     |
     # Fallback to the generic message type as a last resort
     OpenAIResponseMessage,
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index c632e61aa..a14d9267e 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -238,6 +238,7 @@ class OpenAIResponsesImpl:
             temperature=temperature,
             response_format=response_format,
         )
+        ctx.extract_approvals(input)
 
         # Create orchestrator and delegate streaming logic
         response_id = f"resp-{uuid.uuid4()}"
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 3e69fa5cd..c8b4adbab 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -10,10 +10,12 @@ from typing import Any
 
 from llama_stack.apis.agents.openai_responses import (
     AllowedToolsFilter,
+    ApprovalFilter,
     MCPListToolsTool,
     OpenAIResponseContentPartOutputText,
     OpenAIResponseInputTool,
     OpenAIResponseInputToolMCP,
+    OpenAIResponseMCPApprovalRequest,
     OpenAIResponseObject,
     OpenAIResponseObjectStream,
     OpenAIResponseObjectStreamResponseCompleted,
@@ -117,10 +119,17 @@ class StreamingResponseOrchestrator:
                 raise ValueError("Streaming chunk processor failed to return completion data")
             current_response = self._build_chat_completion(completion_result_data)
 
-            function_tool_calls, non_function_tool_calls, next_turn_messages = self._separate_tool_calls(
+            function_tool_calls, non_function_tool_calls, approvals, next_turn_messages = self._separate_tool_calls(
                 current_response, messages
             )
 
+            # add any approval requests required
+            for tool_call in approvals:
+                async for evt in self._add_mcp_approval_request(
+                    tool_call.function.name, tool_call.function.arguments, output_messages
+                ):
+                    yield evt
+
             # Handle choices with no tool calls
             for choice in current_response.choices:
                 if not (choice.message.tool_calls and self.ctx.response_tools):
@@ -164,10 +173,11 @@ class StreamingResponseOrchestrator:
         # Emit response.completed
         yield OpenAIResponseObjectStreamResponseCompleted(response=final_response)
 
-    def _separate_tool_calls(self, current_response, messages) -> tuple[list, list, list]:
+    def _separate_tool_calls(self, current_response, messages) -> tuple[list, list, list, list]:
         """Separate tool calls into function and non-function categories."""
         function_tool_calls = []
         non_function_tool_calls = []
+        approvals = []
         next_turn_messages = messages.copy()
 
         for choice in current_response.choices:
@@ -178,9 +188,23 @@ class StreamingResponseOrchestrator:
                     if is_function_tool_call(tool_call, self.ctx.response_tools):
                         function_tool_calls.append(tool_call)
                     else:
-                        non_function_tool_calls.append(tool_call)
+                        if self._approval_required(tool_call.function.name):
+                            approval_response = self.ctx.approval_response(
+                                tool_call.function.name, tool_call.function.arguments
+                            )
+                            if approval_response:
+                                if approval_response.approve:
+                                    logger.info(f"Approval granted for {tool_call.id} on {tool_call.function.name}")
+                                    non_function_tool_calls.append(tool_call)
+                                else:
+                                    logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
+                            else:
+                                logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
+                                approvals.append(tool_call)
+                        else:
+                            non_function_tool_calls.append(tool_call)
 
-        return function_tool_calls, non_function_tool_calls, next_turn_messages
+        return function_tool_calls, non_function_tool_calls, approvals, next_turn_messages
 
     async def _process_streaming_chunks(
         self, completion_result, output_messages: list[OpenAIResponseOutput]
@@ -632,3 +656,46 @@ class StreamingResponseOrchestrator:
             # TODO: Emit mcp_list_tools.failed event if needed
             logger.exception(f"Failed to list MCP tools from {mcp_tool.server_url}: {e}")
             raise
+
+    def _approval_required(self, tool_name: str) -> bool:
+        if tool_name not in self.mcp_tool_to_server:
+            return False
+        mcp_server = self.mcp_tool_to_server[tool_name]
+        if mcp_server.require_approval == "always":
+            return True
+        if mcp_server.require_approval == "never":
+            return False
+        if isinstance(mcp_server, ApprovalFilter):
+            if tool_name in mcp_server.always:
+                return True
+            if tool_name in mcp_server.never:
+                return False
+        return True
+
+    async def _add_mcp_approval_request(
+        self, tool_name: str, arguments: str, output_messages: list[OpenAIResponseOutput]
+    ) -> AsyncIterator[OpenAIResponseObjectStream]:
+        mcp_server = self.mcp_tool_to_server[tool_name]
+        mcp_approval_request = OpenAIResponseMCPApprovalRequest(
+            arguments=arguments,
+            id=f"approval_{uuid.uuid4()}",
+            name=tool_name,
+            server_label=mcp_server.server_label,
+        )
+        output_messages.append(mcp_approval_request)
+
+        self.sequence_number += 1
+        yield OpenAIResponseObjectStreamResponseOutputItemAdded(
+            response_id=self.response_id,
+            item=mcp_approval_request,
+            output_index=len(output_messages) - 1,
+            sequence_number=self.sequence_number,
+        )
+
+        self.sequence_number += 1
+        yield OpenAIResponseObjectStreamResponseOutputItemDone(
+            response_id=self.response_id,
+            item=mcp_approval_request,
+            output_index=len(output_messages) - 1,
+            sequence_number=self.sequence_number,
+        )
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 89086c262..5dcc9099d 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -11,6 +11,8 @@ from pydantic import BaseModel
 
 from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseInputTool,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
     OpenAIResponseObjectStream,
     OpenAIResponseOutput,
 )
@@ -58,3 +60,24 @@ class ChatCompletionContext(BaseModel):
     chat_tools: list[ChatCompletionToolParam] | None = None
     temperature: float | None
     response_format: OpenAIResponseFormatParam
+    approval_requests: list[OpenAIResponseMCPApprovalRequest] = []
+    approval_responses: dict[str, OpenAIResponseMCPApprovalResponse] = {}
+
+    def extract_approvals(self, inputs):
+        if not isinstance(inputs, str):
+            self.approval_requests = [input for input in inputs if input.type == "mcp_approval_request"]
+            self.approval_responses = {
+                input.approval_request_id: input for input in inputs if input.type == "mcp_approval_response"
+            }
+
+    def approval_response(self, tool_name: str, arguments: str) -> OpenAIResponseMCPApprovalResponse | None:
+        request = self._approval_request(tool_name, arguments)
+        if request and request.id in self.approval_responses:
+            return self.approval_responses[request.id]
+        return None
+
+    def _approval_request(self, tool_name: str, arguments: str) -> OpenAIResponseMCPApprovalRequest | None:
+        for request in self.approval_requests:
+            if request.name == tool_name and request.arguments == arguments:
+                return request
+        return None
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 7aaeb4cd5..310a88298 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -13,6 +13,8 @@ from llama_stack.apis.agents.openai_responses import (
     OpenAIResponseInputMessageContentImage,
     OpenAIResponseInputMessageContentText,
     OpenAIResponseInputTool,
+    OpenAIResponseMCPApprovalRequest,
+    OpenAIResponseMCPApprovalResponse,
     OpenAIResponseMessage,
     OpenAIResponseOutputMessageContent,
     OpenAIResponseOutputMessageContentOutputText,
@@ -149,6 +151,11 @@ async def convert_response_input_to_chat_messages(
             elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
                 # the tool list will be handled separately
                 pass
+            elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
+                input_item, OpenAIResponseMCPApprovalResponse
+            ):
+                # these are handled by the responses impl itself and not pass through to chat completions
+                pass
             else:
                 content = await convert_response_content_to_chat_content(input_item.content)
                 message_type = await get_message_type_by_role(input_item.role)
diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index c5c9e6fc1..f23734892 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -246,6 +246,82 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
         assert "boiling point" in text_content.lower()
 
 
+@pytest.mark.parametrize("case", mcp_tool_test_cases)
+@pytest.mark.parametrize("approve", [True, False])
+def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve):
+    if not isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("in-process MCP server is only supported in library client")
+
+    with make_mcp_server() as mcp_server_info:
+        tools = setup_mcp_tools(case.tools, mcp_server_info)
+        for tool in tools:
+            tool["require_approval"] = "always"
+
+        response = compat_client.responses.create(
+            model=text_model_id,
+            input=case.input,
+            tools=tools,
+            stream=False,
+        )
+
+        assert len(response.output) >= 2
+        list_tools = response.output[0]
+        assert list_tools.type == "mcp_list_tools"
+        assert list_tools.server_label == "localmcp"
+        assert len(list_tools.tools) == 2
+        assert {t.name for t in list_tools.tools} == {
+            "get_boiling_point",
+            "greet_everyone",
+        }
+
+        approval_request = response.output[1]
+        assert approval_request.type == "mcp_approval_request"
+        assert approval_request.name == "get_boiling_point"
+        assert json.loads(approval_request.arguments) == {
+            "liquid_name": "myawesomeliquid",
+            "celsius": True,
+        }
+
+        # send approval response
+        response = compat_client.responses.create(
+            previous_response_id=response.id,
+            model=text_model_id,
+            input=[{"type": "mcp_approval_response", "approval_request_id": approval_request.id, "approve": approve}],
+            tools=tools,
+            stream=False,
+        )
+
+        if approve:
+            assert len(response.output) >= 3
+            list_tools = response.output[0]
+            assert list_tools.type == "mcp_list_tools"
+            assert list_tools.server_label == "localmcp"
+            assert len(list_tools.tools) == 2
+            assert {t.name for t in list_tools.tools} == {
+                "get_boiling_point",
+                "greet_everyone",
+            }
+
+            call = response.output[1]
+            assert call.type == "mcp_call"
+            assert call.name == "get_boiling_point"
+            assert json.loads(call.arguments) == {
+                "liquid_name": "myawesomeliquid",
+                "celsius": True,
+            }
+            assert call.error is None
+            assert "-100" in call.output
+
+            # sometimes the model will call the tool again, so we need to get the last message
+            message = response.output[-1]
+            text_content = message.content[0].text
+            assert "boiling point" in text_content.lower()
+        else:
+            assert len(response.output) >= 1
+            for output in response.output:
+                assert output.type != "mcp_call"
+
+
 @pytest.mark.parametrize("case", custom_tool_test_cases)
 def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
     response = compat_client.responses.create(