From dbe41d9510014227f8518551f444700a65eaf449 Mon Sep 17 00:00:00 2001
From: Omar Abdelwahab <omara@fb.com>
Date: Thu, 6 Nov 2025 11:08:27 -0800
Subject: [PATCH] Updated a single test case to not include authorization field
 in the header

---
 .../responses/test_tool_responses.py          | 153 +++++++++++++-----
 1 file changed, 115 insertions(+), 38 deletions(-)

diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py
index ce0e65b4b..4501961f3 100644
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@@ -24,7 +24,12 @@ from .fixtures.test_cases import (
     multi_turn_tool_execution_test_cases,
     web_search_test_cases,
 )
-from .helpers import new_vector_store, setup_mcp_tools, upload_file, wait_for_file_attachment
+from .helpers import (
+    new_vector_store,
+    setup_mcp_tools,
+    upload_file,
+    wait_for_file_attachment,
+)
 from .streaming_assertions import StreamingValidator
 
 
@@ -48,12 +53,19 @@ def test_response_non_streaming_web_search(compat_client, text_model_id, case):
 
 @pytest.mark.parametrize("case", file_search_test_cases)
 def test_response_non_streaming_file_search(
-    compat_client, text_model_id, embedding_model_id, embedding_dimension, tmp_path, case
+    compat_client,
+    text_model_id,
+    embedding_model_id,
+    embedding_dimension,
+    tmp_path,
+    case,
 ):
     if isinstance(compat_client, LlamaStackAsLibraryClient):
         pytest.skip("Responses API file search is not yet supported in library client.")
 
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(
+        compat_client, "test_vector_store", embedding_model_id, embedding_dimension
+    )
 
     if case.file_content:
         file_name = "test_response_non_streaming_file_search.txt"
@@ -110,7 +122,9 @@ def test_response_non_streaming_file_search_empty_vector_store(
     if isinstance(compat_client, LlamaStackAsLibraryClient):
         pytest.skip("Responses API file search is not yet supported in library client.")
 
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(
+        compat_client, "test_vector_store", embedding_model_id, embedding_dimension
+    )
 
     # Create the response request, which should query our vector store
     response = compat_client.responses.create(
@@ -139,7 +153,9 @@ def test_response_sequential_file_search(
     if isinstance(compat_client, LlamaStackAsLibraryClient):
         pytest.skip("Responses API file search is not yet supported in library client.")
 
-    vector_store = new_vector_store(compat_client, "test_vector_store", embedding_model_id, embedding_dimension)
+    vector_store = new_vector_store(
+        compat_client, "test_vector_store", embedding_model_id, embedding_dimension
+    )
 
     # Create a test file with content
     file_content = "The Llama 4 Maverick model has 128 experts in its mixture of experts architecture."
@@ -248,7 +264,8 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, cap
         )
         # Suppress expected auth error logs only for the failing auth attempt
         with caplog.at_level(
-            logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming"
+            logging.CRITICAL,
+            logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming",
         ):
             with pytest.raises(exc_type):
                 compat_client.responses.create(
@@ -312,7 +329,11 @@ def test_response_sequential_mcp_tool(compat_client, text_model_id, case):
         assert "boiling point" in text_content.lower()
 
         response2 = compat_client.responses.create(
-            model=text_model_id, input=case.input, tools=tools, stream=False, previous_response_id=response.id
+            model=text_model_id,
+            input=case.input,
+            tools=tools,
+            stream=False,
+            previous_response_id=response.id,
         )
 
         assert len(response2.output) >= 1
@@ -361,7 +382,13 @@ def test_response_mcp_tool_approval(compat_client, text_model_id, case, approve)
         response = compat_client.responses.create(
             previous_response_id=response.id,
             model=text_model_id,
-            input=[{"type": "mcp_approval_response", "approval_request_id": approval_request.id, "approve": approve}],
+            input=[
+                {
+                    "type": "mcp_approval_response",
+                    "approval_request_id": approval_request.id,
+                    "approve": approve,
+                }
+            ],
             tools=tools,
             stream=False,
         )
@@ -438,7 +465,11 @@ def test_response_function_call_ordering_1(compat_client, text_model_id, case):
         }
     )
     response = compat_client.responses.create(
-        model=text_model_id, input=inputs, tools=case.tools, stream=False, previous_response_id=response.id
+        model=text_model_id,
+        input=inputs,
+        tools=case.tools,
+        stream=False,
+        previous_response_id=response.id,
     )
     assert len(response.output) == 1
 
@@ -475,10 +506,18 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
         stream=False,
     )
     for output in response.output:
-        if output.type == "function_call" and output.status == "completed" and output.name == "get_weather":
+        if (
+            output.type == "function_call"
+            and output.status == "completed"
+            and output.name == "get_weather"
+        ):
             inputs.append(output)
     for output in response.output:
-        if output.type == "function_call" and output.status == "completed" and output.name == "get_weather":
+        if (
+            output.type == "function_call"
+            and output.status == "completed"
+            and output.name == "get_weather"
+        ):
             weather = "It is raining."
             if "Los Angeles" in output.arguments:
                 weather = "It is cloudy."
@@ -500,7 +539,9 @@ def test_response_function_call_ordering_2(compat_client, text_model_id):
 
 
 @pytest.mark.parametrize("case", multi_turn_tool_execution_test_cases)
-def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+def test_response_non_streaming_multi_turn_tool_execution(
+    compat_client, text_model_id, case
+):
     """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
     if not isinstance(compat_client, LlamaStackAsLibraryClient):
         pytest.skip("in-process MCP server is only supported in library client")
@@ -515,12 +556,18 @@ def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_mo
         )
 
         # Verify we have MCP tool calls in the output
-        mcp_list_tools = [output for output in response.output if output.type == "mcp_list_tools"]
+        mcp_list_tools = [
+            output for output in response.output if output.type == "mcp_list_tools"
+        ]
         mcp_calls = [output for output in response.output if output.type == "mcp_call"]
-        message_outputs = [output for output in response.output if output.type == "message"]
+        message_outputs = [
+            output for output in response.output if output.type == "message"
+        ]
 
         # Should have exactly 1 MCP list tools message (at the beginning)
-        assert len(mcp_list_tools) == 1, f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
+        assert (
+            len(mcp_list_tools) == 1
+        ), f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
         assert mcp_list_tools[0].server_label == "localmcp"
         assert len(mcp_list_tools[0].tools) == 5  # Updated for dependency tools
         expected_tool_names = {
@@ -532,25 +579,37 @@ def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_mo
         }
         assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names
 
-        assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
+        assert (
+            len(mcp_calls) >= 1
+        ), f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
         for mcp_call in mcp_calls:
-            assert mcp_call.error is None, f"MCP call should not have errors, got: {mcp_call.error}"
+            assert (
+                mcp_call.error is None
+            ), f"MCP call should not have errors, got: {mcp_call.error}"
 
-        assert len(message_outputs) >= 1, f"Expected at least 1 message output, got {len(message_outputs)}"
+        assert (
+            len(message_outputs) >= 1
+        ), f"Expected at least 1 message output, got {len(message_outputs)}"
 
         final_message = message_outputs[-1]
-        assert final_message.role == "assistant", f"Final message should be from assistant, got {final_message.role}"
-        assert final_message.status == "completed", f"Final message should be completed, got {final_message.status}"
+        assert (
+            final_message.role == "assistant"
+        ), f"Final message should be from assistant, got {final_message.role}"
+        assert (
+            final_message.status == "completed"
+        ), f"Final message should be completed, got {final_message.status}"
         assert len(final_message.content) > 0, "Final message should have content"
 
         expected_output = case.expected
-        assert expected_output.lower() in response.output_text.lower(), (
-            f"Expected '{expected_output}' to appear in response: {response.output_text}"
-        )
+        assert (
+            expected_output.lower() in response.output_text.lower()
+        ), f"Expected '{expected_output}' to appear in response: {response.output_text}"
 
 
 @pytest.mark.parametrize("case", multi_turn_tool_execution_streaming_test_cases)
-def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
+def test_response_streaming_multi_turn_tool_execution(
+    compat_client, text_model_id, case
+):
     """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
     if not isinstance(compat_client, LlamaStackAsLibraryClient):
         pytest.skip("in-process MCP server is only supported in library client")
@@ -583,12 +642,22 @@ def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_
             final_response = final_chunk.response
 
             # Verify multi-turn MCP tool execution results
-            mcp_list_tools = [output for output in final_response.output if output.type == "mcp_list_tools"]
-            mcp_calls = [output for output in final_response.output if output.type == "mcp_call"]
-            message_outputs = [output for output in final_response.output if output.type == "message"]
+            mcp_list_tools = [
+                output
+                for output in final_response.output
+                if output.type == "mcp_list_tools"
+            ]
+            mcp_calls = [
+                output for output in final_response.output if output.type == "mcp_call"
+            ]
+            message_outputs = [
+                output for output in final_response.output if output.type == "message"
+            ]
 
             # Should have exactly 1 MCP list tools message (at the beginning)
-            assert len(mcp_list_tools) == 1, f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
+            assert (
+                len(mcp_list_tools) == 1
+            ), f"Expected exactly 1 mcp_list_tools, got {len(mcp_list_tools)}"
             assert mcp_list_tools[0].server_label == "localmcp"
             assert len(mcp_list_tools[0].tools) == 5  # Updated for dependency tools
             expected_tool_names = {
@@ -601,25 +670,33 @@ def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_
             assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names
 
             # Should have at least 1 MCP call (the model should call at least one tool)
-            assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
+            assert (
+                len(mcp_calls) >= 1
+            ), f"Expected at least 1 mcp_call, got {len(mcp_calls)}"
 
             # All MCP calls should be completed (verifies our tool execution works)
             for mcp_call in mcp_calls:
-                assert mcp_call.error is None, f"MCP call should not have errors, got: {mcp_call.error}"
+                assert (
+                    mcp_call.error is None
+                ), f"MCP call should not have errors, got: {mcp_call.error}"
 
             # Should have at least one final message response
-            assert len(message_outputs) >= 1, f"Expected at least 1 message output, got {len(message_outputs)}"
+            assert (
+                len(message_outputs) >= 1
+            ), f"Expected at least 1 message output, got {len(message_outputs)}"
 
             # Final message should be from assistant and completed
             final_message = message_outputs[-1]
-            assert final_message.role == "assistant", (
-                f"Final message should be from assistant, got {final_message.role}"
-            )
-            assert final_message.status == "completed", f"Final message should be completed, got {final_message.status}"
+            assert (
+                final_message.role == "assistant"
+            ), f"Final message should be from assistant, got {final_message.role}"
+            assert (
+                final_message.status == "completed"
+            ), f"Final message should be completed, got {final_message.status}"
             assert len(final_message.content) > 0, "Final message should have content"
 
             # Check that the expected output appears in the response
             expected_output = case.expected
-            assert expected_output.lower() in final_response.output_text.lower(), (
-                f"Expected '{expected_output}' to appear in response: {final_response.output_text}"
-            )
+            assert (
+                expected_output.lower() in final_response.output_text.lower()
+            ), f"Expected '{expected_output}' to appear in response: {final_response.output_text}"