feat(tests): enable MCP tests in server mode (#4146)

We would like to run all OpenAI compatibility tests using only the openai-client library. This is most friendly for contributors since they can run tests without needing to update the client-sdks (which is getting easier but still a long pole.) This is the first step in enabling that -- no using "library client" for any of the Responses tests. This seems like a reasonable trade-off since the usage of an embeddeble library client for Responses (or any OpenAI-compatible) behavior seems to be not very common. To do this, we needed to enable MCP tests (which only worked in library client mode) for server mode.
2025-12-03 09:53:45 +00:00 · 2025-11-13 07:23:23 -08:00 · 2025-11-13 07:23:23 -08:00 · 1e81056a22
commit 1e81056a22
parent 9eb81439d2
29 changed files with 13388 additions and 127 deletions
--- a/tests/integration/responses/test_basic_responses.py
+++ b/tests/integration/responses/test_basic_responses.py
@ -13,8 +13,8 @@ from .streaming_assertions import StreamingValidator


@pytest.mark.parametrize("case", basic_test_cases)
-def test_response_non_streaming_basic(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_basic(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
        model=text_model_id,
        input=case.input,
        stream=False,
@ -31,10 +31,10 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
        "Total tokens should equal input + output tokens"
    )

-    retrieved_response = compat_client.responses.retrieve(response_id=response.id)
+    retrieved_response = responses_client.responses.retrieve(response_id=response.id)
    assert retrieved_response.output_text == response.output_text

-    next_response = compat_client.responses.create(
+    next_response = responses_client.responses.create(
        model=text_model_id,
        input="Repeat your previous response in all caps.",
        previous_response_id=response.id,
@ -44,8 +44,8 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):


@pytest.mark.parametrize("case", basic_test_cases)
-def test_response_streaming_basic(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_streaming_basic(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
        model=text_model_id,
        input=case.input,
        stream=True,
@ -98,15 +98,15 @@ def test_response_streaming_basic(compat_client, text_model_id, case):
    validator.assert_response_consistency()

    # Verify stored response matches streamed response
-    retrieved_response = compat_client.responses.retrieve(response_id=response_id)
+    retrieved_response = responses_client.responses.retrieve(response_id=response_id)
    final_event = events[-1]
    assert retrieved_response.output_text == final_event.response.output_text


@pytest.mark.parametrize("case", basic_test_cases)
-def test_response_streaming_incremental_content(compat_client, text_model_id, case):
+def test_response_streaming_incremental_content(responses_client, text_model_id, case):
    """Test that streaming actually delivers content incrementally, not just at the end."""
-    response = compat_client.responses.create(
+    response = responses_client.responses.create(
        model=text_model_id,
        input=case.input,
        stream=True,
@ -170,10 +170,10 @@ def test_response_streaming_incremental_content(compat_client, text_model_id, ca


@pytest.mark.parametrize("case", multi_turn_test_cases)
-def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn(responses_client, text_model_id, case):
    previous_response_id = None
    for turn_input, turn_expected in case.turns:
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
            model=text_model_id,
            input=turn_input,
            previous_response_id=previous_response_id,
@ -184,8 +184,8 @@ def test_response_non_streaming_multi_turn(compat_client, text_model_id, case):


@pytest.mark.parametrize("case", image_test_cases)
-def test_response_non_streaming_image(compat_client, text_model_id, case):
-    response = compat_client.responses.create(
+def test_response_non_streaming_image(responses_client, text_model_id, case):
+    response = responses_client.responses.create(
        model=text_model_id,
        input=case.input,
        stream=False,
@ -195,10 +195,10 @@ def test_response_non_streaming_image(compat_client, text_model_id, case):


@pytest.mark.parametrize("case", multi_turn_image_test_cases)
-def test_response_non_streaming_multi_turn_image(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn_image(responses_client, text_model_id, case):
    previous_response_id = None
    for turn_input, turn_expected in case.turns:
-        response = compat_client.responses.create(
+        response = responses_client.responses.create(
            model=text_model_id,
            input=turn_input,
            previous_response_id=previous_response_id,