diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 97671f084..20f05a110 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -9028,6 +9028,12 @@
{
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
},
+ {
+ "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
+ },
{
"$ref": "#/components/schemas/OpenAIResponseMessage"
}
@@ -9445,6 +9451,68 @@
"title": "OpenAIResponseInputToolWebSearch",
"description": "Web search tool configuration for OpenAI response inputs."
},
+ "OpenAIResponseMCPApprovalRequest": {
+ "type": "object",
+ "properties": {
+ "arguments": {
+ "type": "string"
+ },
+ "id": {
+ "type": "string"
+ },
+ "name": {
+ "type": "string"
+ },
+ "server_label": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "mcp_approval_request",
+ "default": "mcp_approval_request"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "arguments",
+ "id",
+ "name",
+ "server_label",
+ "type"
+ ],
+ "title": "OpenAIResponseMCPApprovalRequest",
+ "description": "A request for human approval of a tool invocation."
+ },
+ "OpenAIResponseMCPApprovalResponse": {
+ "type": "object",
+ "properties": {
+ "approval_request_id": {
+ "type": "string"
+ },
+ "approve": {
+ "type": "boolean"
+ },
+ "type": {
+ "type": "string",
+ "const": "mcp_approval_response",
+ "default": "mcp_approval_response"
+ },
+ "id": {
+ "type": "string"
+ },
+ "reason": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "approval_request_id",
+ "approve",
+ "type"
+ ],
+ "title": "OpenAIResponseMCPApprovalResponse",
+ "description": "A response to an MCP approval request."
+ },
"OpenAIResponseMessage": {
"type": "object",
"properties": {
@@ -9949,6 +10017,9 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
}
],
"discriminator": {
@@ -9959,7 +10030,8 @@
"file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
"mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
- "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+ "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools",
+ "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
}
}
},
@@ -10658,6 +10730,9 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
}
],
"discriminator": {
@@ -10668,7 +10743,8 @@
"file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
"mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
- "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+ "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools",
+ "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
}
},
"description": "The output item that was added (message, tool call, etc.)"
@@ -10725,6 +10801,9 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+ },
+ {
+ "$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
}
],
"discriminator": {
@@ -10735,7 +10814,8 @@
"file_search_call": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall",
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
"mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
- "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
+ "mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools",
+ "mcp_approval_request": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
}
},
"description": "The completed output item (message, tool call, etc.)"
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 33a7e66d8..bf8357333 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -6541,6 +6541,8 @@ components:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
+ - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
+ - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseMessage'
"OpenAIResponseInputFunctionToolCallOutput":
type: object
@@ -6835,6 +6837,53 @@ components:
title: OpenAIResponseInputToolWebSearch
description: >-
Web search tool configuration for OpenAI response inputs.
+ OpenAIResponseMCPApprovalRequest:
+ type: object
+ properties:
+ arguments:
+ type: string
+ id:
+ type: string
+ name:
+ type: string
+ server_label:
+ type: string
+ type:
+ type: string
+ const: mcp_approval_request
+ default: mcp_approval_request
+ additionalProperties: false
+ required:
+ - arguments
+ - id
+ - name
+ - server_label
+ - type
+ title: OpenAIResponseMCPApprovalRequest
+ description: >-
+ A request for human approval of a tool invocation.
+ OpenAIResponseMCPApprovalResponse:
+ type: object
+ properties:
+ approval_request_id:
+ type: string
+ approve:
+ type: boolean
+ type:
+ type: string
+ const: mcp_approval_response
+ default: mcp_approval_response
+ id:
+ type: string
+ reason:
+ type: string
+ additionalProperties: false
+ required:
+ - approval_request_id
+ - approve
+ - type
+ title: OpenAIResponseMCPApprovalResponse
+ description: A response to an MCP approval request.
OpenAIResponseMessage:
type: object
properties:
@@ -7227,6 +7276,7 @@ components:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+ - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
discriminator:
propertyName: type
mapping:
@@ -7236,6 +7286,7 @@ components:
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+ mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
OpenAIResponseOutputMessageMCPCall:
type: object
properties:
@@ -7785,6 +7836,7 @@ components:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+ - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
discriminator:
propertyName: type
mapping:
@@ -7794,6 +7846,7 @@ components:
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+ mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
description: >-
The output item that was added (message, tool call, etc.)
output_index:
@@ -7836,6 +7889,7 @@ components:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+ - $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
discriminator:
propertyName: type
mapping:
@@ -7845,6 +7899,7 @@ components:
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
+ mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
description: >-
The completed output item (message, tool call, etc.)
output_index:
diff --git a/llama_stack/apis/agents/openai_responses.py b/llama_stack/apis/agents/openai_responses.py
index b26b11f4f..190e35fd0 100644
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@@ -276,13 +276,40 @@ class OpenAIResponseOutputMessageMCPListTools(BaseModel):
tools: list[MCPListToolsTool]
+@json_schema_type
+class OpenAIResponseMCPApprovalRequest(BaseModel):
+ """
+ A request for human approval of a tool invocation.
+ """
+
+ arguments: str
+ id: str
+ name: str
+ server_label: str
+ type: Literal["mcp_approval_request"] = "mcp_approval_request"
+
+
+@json_schema_type
+class OpenAIResponseMCPApprovalResponse(BaseModel):
+ """
+ A response to an MCP approval request.
+ """
+
+ approval_request_id: str
+ approve: bool
+ type: Literal["mcp_approval_response"] = "mcp_approval_response"
+ id: str | None = None
+ reason: str | None = None
+
+
OpenAIResponseOutput = Annotated[
OpenAIResponseMessage
| OpenAIResponseOutputMessageWebSearchToolCall
| OpenAIResponseOutputMessageFileSearchToolCall
| OpenAIResponseOutputMessageFunctionToolCall
| OpenAIResponseOutputMessageMCPCall
- | OpenAIResponseOutputMessageMCPListTools,
+ | OpenAIResponseOutputMessageMCPListTools
+ | OpenAIResponseMCPApprovalRequest,
Field(discriminator="type"),
]
register_schema(OpenAIResponseOutput, name="OpenAIResponseOutput")
@@ -723,6 +750,8 @@ OpenAIResponseInput = Annotated[
| OpenAIResponseOutputMessageFileSearchToolCall
| OpenAIResponseOutputMessageFunctionToolCall
| OpenAIResponseInputFunctionToolCallOutput
+ | OpenAIResponseMCPApprovalRequest
+ | OpenAIResponseMCPApprovalResponse
|
# Fallback to the generic message type as a last resort
OpenAIResponseMessage,
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
index c632e61aa..c27dc8467 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -237,6 +237,7 @@ class OpenAIResponsesImpl:
response_tools=tools,
temperature=temperature,
response_format=response_format,
+ inputs=input,
)
# Create orchestrator and delegate streaming logic
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
index 2f45ad2a3..1df37d1e6 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -10,10 +10,12 @@ from typing import Any
from llama_stack.apis.agents.openai_responses import (
AllowedToolsFilter,
+ ApprovalFilter,
MCPListToolsTool,
OpenAIResponseContentPartOutputText,
OpenAIResponseInputTool,
OpenAIResponseInputToolMCP,
+ OpenAIResponseMCPApprovalRequest,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponseObjectStreamResponseCompleted,
@@ -147,10 +149,17 @@ class StreamingResponseOrchestrator:
raise ValueError("Streaming chunk processor failed to return completion data")
current_response = self._build_chat_completion(completion_result_data)
- function_tool_calls, non_function_tool_calls, next_turn_messages = self._separate_tool_calls(
+ function_tool_calls, non_function_tool_calls, approvals, next_turn_messages = self._separate_tool_calls(
current_response, messages
)
+ # add any approval requests required
+ for tool_call in approvals:
+ async for evt in self._add_mcp_approval_request(
+ tool_call.function.name, tool_call.function.arguments, output_messages
+ ):
+ yield evt
+
# Handle choices with no tool calls
for choice in current_response.choices:
if not (choice.message.tool_calls and self.ctx.response_tools):
@@ -194,10 +203,11 @@ class StreamingResponseOrchestrator:
# Emit response.completed
yield OpenAIResponseObjectStreamResponseCompleted(response=final_response)
- def _separate_tool_calls(self, current_response, messages) -> tuple[list, list, list]:
+ def _separate_tool_calls(self, current_response, messages) -> tuple[list, list, list, list]:
"""Separate tool calls into function and non-function categories."""
function_tool_calls = []
non_function_tool_calls = []
+ approvals = []
next_turn_messages = messages.copy()
for choice in current_response.choices:
@@ -208,9 +218,23 @@ class StreamingResponseOrchestrator:
if is_function_tool_call(tool_call, self.ctx.response_tools):
function_tool_calls.append(tool_call)
else:
- non_function_tool_calls.append(tool_call)
+ if self._approval_required(tool_call.function.name):
+ approval_response = self.ctx.approval_response(
+ tool_call.function.name, tool_call.function.arguments
+ )
+ if approval_response:
+ if approval_response.approve:
+ logger.info(f"Approval granted for {tool_call.id} on {tool_call.function.name}")
+ non_function_tool_calls.append(tool_call)
+ else:
+ logger.info(f"Approval denied for {tool_call.id} on {tool_call.function.name}")
+ else:
+ logger.info(f"Requesting approval for {tool_call.id} on {tool_call.function.name}")
+ approvals.append(tool_call)
+ else:
+ non_function_tool_calls.append(tool_call)
- return function_tool_calls, non_function_tool_calls, next_turn_messages
+ return function_tool_calls, non_function_tool_calls, approvals, next_turn_messages
async def _process_streaming_chunks(
self, completion_result, output_messages: list[OpenAIResponseOutput]
@@ -646,3 +670,46 @@ class StreamingResponseOrchestrator:
# TODO: Emit mcp_list_tools.failed event if needed
logger.exception(f"Failed to list MCP tools from {mcp_tool.server_url}: {e}")
raise
+
+ def _approval_required(self, tool_name: str) -> bool:
+ if tool_name not in self.mcp_tool_to_server:
+ return False
+ mcp_server = self.mcp_tool_to_server[tool_name]
+ if mcp_server.require_approval == "always":
+ return True
+ if mcp_server.require_approval == "never":
+ return False
+ if isinstance(mcp_server, ApprovalFilter):
+ if tool_name in mcp_server.always:
+ return True
+ if tool_name in mcp_server.never:
+ return False
+ return True
+
+ async def _add_mcp_approval_request(
+ self, tool_name: str, arguments: str, output_messages: list[OpenAIResponseOutput]
+ ) -> AsyncIterator[OpenAIResponseObjectStream]:
+ mcp_server = self.mcp_tool_to_server[tool_name]
+ mcp_approval_request = OpenAIResponseMCPApprovalRequest(
+ arguments=arguments,
+ id=f"approval_{uuid.uuid4()}",
+ name=tool_name,
+ server_label=mcp_server.server_label,
+ )
+ output_messages.append(mcp_approval_request)
+
+ self.sequence_number += 1
+ yield OpenAIResponseObjectStreamResponseOutputItemAdded(
+ response_id=self.response_id,
+ item=mcp_approval_request,
+ output_index=len(output_messages) - 1,
+ sequence_number=self.sequence_number,
+ )
+
+ self.sequence_number += 1
+ yield OpenAIResponseObjectStreamResponseOutputItemDone(
+ response_id=self.response_id,
+ item=mcp_approval_request,
+ output_index=len(output_messages) - 1,
+ sequence_number=self.sequence_number,
+ )
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
index 89086c262..d3b5a16bd 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/types.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -10,7 +10,10 @@ from openai.types.chat import ChatCompletionToolParam
from pydantic import BaseModel
from llama_stack.apis.agents.openai_responses import (
+ OpenAIResponseInput,
OpenAIResponseInputTool,
+ OpenAIResponseMCPApprovalRequest,
+ OpenAIResponseMCPApprovalResponse,
OpenAIResponseObjectStream,
OpenAIResponseOutput,
)
@@ -58,3 +61,37 @@ class ChatCompletionContext(BaseModel):
chat_tools: list[ChatCompletionToolParam] | None = None
temperature: float | None
response_format: OpenAIResponseFormatParam
+ approval_requests: list[OpenAIResponseMCPApprovalRequest] = []
+ approval_responses: dict[str, OpenAIResponseMCPApprovalResponse] = {}
+
+ def __init__(
+ self,
+ model: str,
+ messages: list[OpenAIMessageParam],
+ response_tools: list[OpenAIResponseInputTool] | None,
+ temperature: float | None,
+ response_format: OpenAIResponseFormatParam,
+ inputs: list[OpenAIResponseInput] | str,
+ ):
+ super().__init__(
+ model=model,
+ messages=messages,
+ response_tools=response_tools,
+ temperature=temperature,
+ response_format=response_format,
+ )
+ if not isinstance(inputs, str):
+ self.approval_requests = [input for input in inputs if input.type == "mcp_approval_request"]
+ self.approval_responses = {
+ input.approval_request_id: input for input in inputs if input.type == "mcp_approval_response"
+ }
+
+ def approval_response(self, tool_name: str, arguments: str) -> OpenAIResponseMCPApprovalResponse | None:
+ request = self._approval_request(tool_name, arguments)
+ return self.approval_responses.get(request.id, None) if request else None
+
+ def _approval_request(self, tool_name: str, arguments: str) -> OpenAIResponseMCPApprovalRequest | None:
+ for request in self.approval_requests:
+ if request.name == tool_name and request.arguments == arguments:
+ return request
+ return None
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
index 7aaeb4cd5..310a88298 100644
--- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -13,6 +13,8 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool,
+ OpenAIResponseMCPApprovalRequest,
+ OpenAIResponseMCPApprovalResponse,
OpenAIResponseMessage,
OpenAIResponseOutputMessageContent,
OpenAIResponseOutputMessageContentOutputText,
@@ -149,6 +151,11 @@ async def convert_response_input_to_chat_messages(
elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
# the tool list will be handled separately
pass
+ elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
+ input_item, OpenAIResponseMCPApprovalResponse
+ ):
+ # these are handled by the responses impl itself and not pass through to chat completions
+ pass
else:
content = await convert_response_content_to_chat_content(input_item.content)
message_type = await get_message_type_by_role(input_item.role)
diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index c5c9e6fc1..f23734892 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -246,6 +246,82 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
assert "boiling point" in text_content.lower()
+@pytest.mark.parametrize("case", mcp_tool_test_cases)
+@pytest.mark.parametrize("approve", [True, False])
+def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve):
+ if not isinstance(compat_client, LlamaStackAsLibraryClient):
+ pytest.skip("in-process MCP server is only supported in library client")
+
+ with make_mcp_server() as mcp_server_info:
+ tools = setup_mcp_tools(case.tools, mcp_server_info)
+ for tool in tools:
+ tool["require_approval"] = "always"
+
+ response = compat_client.responses.create(
+ model=text_model_id,
+ input=case.input,
+ tools=tools,
+ stream=False,
+ )
+
+ assert len(response.output) >= 2
+ list_tools = response.output[0]
+ assert list_tools.type == "mcp_list_tools"
+ assert list_tools.server_label == "localmcp"
+ assert len(list_tools.tools) == 2
+ assert {t.name for t in list_tools.tools} == {
+ "get_boiling_point",
+ "greet_everyone",
+ }
+
+ approval_request = response.output[1]
+ assert approval_request.type == "mcp_approval_request"
+ assert approval_request.name == "get_boiling_point"
+ assert json.loads(approval_request.arguments) == {
+ "liquid_name": "myawesomeliquid",
+ "celsius": True,
+ }
+
+ # send approval response
+ response = compat_client.responses.create(
+ previous_response_id=response.id,
+ model=text_model_id,
+ input=[{"type": "mcp_approval_response", "approval_request_id": approval_request.id, "approve": approve}],
+ tools=tools,
+ stream=False,
+ )
+
+ if approve:
+ assert len(response.output) >= 3
+ list_tools = response.output[0]
+ assert list_tools.type == "mcp_list_tools"
+ assert list_tools.server_label == "localmcp"
+ assert len(list_tools.tools) == 2
+ assert {t.name for t in list_tools.tools} == {
+ "get_boiling_point",
+ "greet_everyone",
+ }
+
+ call = response.output[1]
+ assert call.type == "mcp_call"
+ assert call.name == "get_boiling_point"
+ assert json.loads(call.arguments) == {
+ "liquid_name": "myawesomeliquid",
+ "celsius": True,
+ }
+ assert call.error is None
+ assert "-100" in call.output
+
+ # sometimes the model will call the tool again, so we need to get the last message
+ message = response.output[-1]
+ text_content = message.content[0].text
+ assert "boiling point" in text_content.lower()
+ else:
+ assert len(response.output) >= 1
+ for output in response.output:
+ assert output.type != "mcp_call"
+
+
@pytest.mark.parametrize("case", custom_tool_test_cases)
def test_response_non_streaming_custom_tool(compat_client, text_model_id, case):
response = compat_client.responses.create(