From 9bb0e16908ceb69b6612d9c6392619ba4bd11070 Mon Sep 17 00:00:00 2001
From: Anastas Stoyanovsky <astoyano@redhat.com>
Date: Fri, 14 Nov 2025 14:42:14 -0500
Subject: [PATCH] Remove unreliable parallel tool calls tests

Removed tests for parallel tool calls due to reliability issues.
---
 .../agents/test_openai_responses.py           | 93 -------------------
 1 file changed, 93 deletions(-)

diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py
index d98880c4a..057cee774 100644
--- a/tests/integration/agents/test_openai_responses.py
+++ b/tests/integration/agents/test_openai_responses.py
@@ -682,96 +682,3 @@ def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, te
 
     # Verify we have a valid max_tool_calls field
     assert response_3.max_tool_calls == max_tool_calls[1]
-
-
-@pytest.mark.skip(reason="Tool calling is not reliable.")
-def test_parallel_tool_calls_true(openai_client, client_with_models, text_model_id):
-    """Test handling of max_tool_calls with function tools in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-    parallel_tool_calls = True
-
-    tools = [
-        {
-            "type": "function",
-            "name": "get_weather",
-            "description": "Get weather information for a specified location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city name (e.g., 'New York', 'London')",
-                    },
-                },
-            },
-        }
-    ]
-
-    # First create a response that triggers function tools
-    response = client.responses.create(
-        model=text_model_id,
-        input="Get the weather in New York and in Paris",
-        tools=tools,
-        stream=False,
-        parallel_tool_calls=parallel_tool_calls,
-    )
-
-    # Verify we got two function calls and that the max_tool_calls do not affect function tools
-    assert len(response.output) == 2
-    assert response.output[0].type == "function_call"
-    assert response.output[0].name == "get_weather"
-    assert response.output[0].status == "completed"
-    assert response.output[1].type == "function_call"
-    assert response.output[1].name == "get_weather"
-    assert response.output[0].status == "completed"
-
-    # Verify we have a valid max_tool_calls field
-    assert response.parallel_tool_calls == parallel_tool_calls
-
-
-@pytest.mark.skip(reason="Tool calling is not reliable.")
-def test_parallel_tool_calls_false(openai_client, client_with_models, text_model_id):
-    """Test handling of max_tool_calls with function tools in responses."""
-    if isinstance(client_with_models, LlamaStackAsLibraryClient):
-        pytest.skip("OpenAI responses are not supported when testing with library client yet.")
-
-    client = openai_client
-    parallel_tool_calls = False
-
-    tools = [
-        {
-            "type": "function",
-            "name": "get_weather",
-            "description": "Get weather information for a specified location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city name (e.g., 'New York', 'London')",
-                    },
-                },
-            },
-        }
-    ]
-
-    # First create a response that triggers function tools
-    response = client.responses.create(
-        model=text_model_id,
-        input="Get the weather in New York and in Paris",
-        tools=tools,
-        stream=False,
-        parallel_tool_calls=parallel_tool_calls,
-    )
-
-    # Verify we got two function calls and that the max_tool_calls do not affect function tools
-    assert len(response.output) == 1
-    assert response.output[0].type == "function_call"
-    assert response.output[0].name == "get_weather"
-    assert response.output[0].status == "completed"
-
-    # Verify we have a valid max_tool_calls field
-    assert response.parallel_tool_calls == parallel_tool_calls