feat: enable MCP execution in Responses impl (#2240)

## Test Plan ``` pytest -s -v 'tests/verifications/openai_api/test_responses.py' \ --provider=stack:together --model meta-llama/Llama-4-Scout-17B-16E-Instruct ```
2025-12-05 18:27:22 +00:00 · 2025-05-24 14:20:42 -07:00 · 2025-05-24 14:20:42 -07:00 · 3faf1e4a79
commit 3faf1e4a79
parent 66f09f24ed
15 changed files with 865 additions and 382 deletions
--- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@ -31,6 +31,18 @@ test_response_web_search:
        search_context_size: "low"
      output: "128"

+test_response_mcp_tool:
+  test_name: test_response_mcp_tool
+  test_params:
+    case:
+    - case_id: "boiling_point_tool"
+      input: "What is the boiling point of polyjuice?"
+      tools:
+      - type: mcp
+        server_label: "localmcp"
+        server_url: "<FILLED_BY_TEST_RUNNER>"
+      output: "Hello, world!"
+
 test_response_custom_tool:
  test_name: test_response_custom_tool
  test_params:
--- a/tests/verifications/openai_api/test_responses.py
+++ b/tests/verifications/openai_api/test_responses.py
@ -4,9 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import json

+import httpx
 import pytest

+from llama_stack import LlamaStackAsLibraryClient
+from llama_stack.distribution.datatypes import AuthenticationRequiredError
+from tests.common.mcp import make_mcp_server
 from tests.verifications.openai_api.fixtures.fixtures import (
    case_id_generator,
    get_base_test_name,
@ -124,6 +129,79 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
    assert case["output"].lower() in response.output_text.lower().strip()


+@pytest.mark.parametrize(
+    "case",
+    responses_test_cases["test_response_mcp_tool"]["test_params"]["case"],
+    ids=case_id_generator,
+)
+def test_response_non_streaming_mcp_tool(request, openai_client, model, provider, verification_config, case):
+    test_name_base = get_base_test_name(request)
+    if should_skip_test(verification_config, provider, model, test_name_base):
+        pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
+
+    with make_mcp_server() as mcp_server_info:
+        tools = case["tools"]
+        for tool in tools:
+            if tool["type"] == "mcp":
+                tool["server_url"] = mcp_server_info["server_url"]
+
+        response = openai_client.responses.create(
+            model=model,
+            input=case["input"],
+            tools=tools,
+            stream=False,
+        )
+        assert len(response.output) >= 3
+        list_tools = response.output[0]
+        assert list_tools.type == "mcp_list_tools"
+        assert list_tools.server_label == "localmcp"
+        assert len(list_tools.tools) == 2
+        assert {t["name"] for t in list_tools.tools} == {"get_boiling_point", "greet_everyone"}
+
+        call = response.output[1]
+        assert call.type == "mcp_call"
+        assert call.name == "get_boiling_point"
+        assert json.loads(call.arguments) == {"liquid_name": "polyjuice", "celcius": True}
+        assert call.error is None
+        assert "-100" in call.output
+
+        message = response.output[2]
+        text_content = message.content[0].text
+        assert "boiling point" in text_content.lower()
+
+    with make_mcp_server(required_auth_token="test-token") as mcp_server_info:
+        tools = case["tools"]
+        for tool in tools:
+            if tool["type"] == "mcp":
+                tool["server_url"] = mcp_server_info["server_url"]
+
+        exc_type = (
+            AuthenticationRequiredError
+            if isinstance(openai_client, LlamaStackAsLibraryClient)
+            else httpx.HTTPStatusError
+        )
+        with pytest.raises(exc_type):
+            openai_client.responses.create(
+                model=model,
+                input=case["input"],
+                tools=tools,
+                stream=False,
+            )
+
+        for tool in tools:
+            if tool["type"] == "mcp":
+                tool["server_url"] = mcp_server_info["server_url"]
+                tool["headers"] = {"Authorization": "Bearer test-token"}
+
+        response = openai_client.responses.create(
+            model=model,
+            input=case["input"],
+            tools=tools,
+            stream=False,
+        )
+        assert len(response.output) >= 3
+
+
@pytest.mark.parametrize(
    "case",
    responses_test_cases["test_response_custom_tool"]["test_params"]["case"],