From 9bb0e16908ceb69b6612d9c6392619ba4bd11070 Mon Sep 17 00:00:00 2001 From: Anastas Stoyanovsky Date: Fri, 14 Nov 2025 14:42:14 -0500 Subject: [PATCH] Remove unreliable parallel tool calls tests Removed tests for parallel tool calls due to reliability issues. --- .../agents/test_openai_responses.py | 93 ------------------- 1 file changed, 93 deletions(-) diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py index d98880c4a..057cee774 100644 --- a/tests/integration/agents/test_openai_responses.py +++ b/tests/integration/agents/test_openai_responses.py @@ -682,96 +682,3 @@ def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, te # Verify we have a valid max_tool_calls field assert response_3.max_tool_calls == max_tool_calls[1] - - -@pytest.mark.skip(reason="Tool calling is not reliable.") -def test_parallel_tool_calls_true(openai_client, client_with_models, text_model_id): - """Test handling of max_tool_calls with function tools in responses.""" - if isinstance(client_with_models, LlamaStackAsLibraryClient): - pytest.skip("OpenAI responses are not supported when testing with library client yet.") - - client = openai_client - parallel_tool_calls = True - - tools = [ - { - "type": "function", - "name": "get_weather", - "description": "Get weather information for a specified location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city name (e.g., 'New York', 'London')", - }, - }, - }, - } - ] - - # First create a response that triggers function tools - response = client.responses.create( - model=text_model_id, - input="Get the weather in New York and in Paris", - tools=tools, - stream=False, - parallel_tool_calls=parallel_tool_calls, - ) - - # Verify we got two function calls and that the max_tool_calls do not affect function tools - assert len(response.output) == 2 - assert response.output[0].type == "function_call" - assert response.output[0].name == "get_weather" - assert response.output[0].status == "completed" - assert response.output[1].type == "function_call" - assert response.output[1].name == "get_weather" - assert response.output[0].status == "completed" - - # Verify we have a valid max_tool_calls field - assert response.parallel_tool_calls == parallel_tool_calls - - -@pytest.mark.skip(reason="Tool calling is not reliable.") -def test_parallel_tool_calls_false(openai_client, client_with_models, text_model_id): - """Test handling of max_tool_calls with function tools in responses.""" - if isinstance(client_with_models, LlamaStackAsLibraryClient): - pytest.skip("OpenAI responses are not supported when testing with library client yet.") - - client = openai_client - parallel_tool_calls = False - - tools = [ - { - "type": "function", - "name": "get_weather", - "description": "Get weather information for a specified location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city name (e.g., 'New York', 'London')", - }, - }, - }, - } - ] - - # First create a response that triggers function tools - response = client.responses.create( - model=text_model_id, - input="Get the weather in New York and in Paris", - tools=tools, - stream=False, - parallel_tool_calls=parallel_tool_calls, - ) - - # Verify we got two function calls and that the max_tool_calls do not affect function tools - assert len(response.output) == 1 - assert response.output[0].type == "function_call" - assert response.output[0].name == "get_weather" - assert response.output[0].status == "completed" - - # Verify we have a valid max_tool_calls field - assert response.parallel_tool_calls == parallel_tool_calls