From 12dfcd11d9b5683333973dec58bebdfa860f7a3a Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Sun, 25 May 2025 19:08:36 -0700
Subject: [PATCH] fix

---
 .../agents/meta_reference/openai_responses.py      |  9 +++++++++
 .../agents/meta_reference/test_openai_responses.py | 14 +++++++++++---
 tests/verifications/openai_api/test_responses.py   |  7 +------
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
index c208d7429..35c7ad8ba 100644
--- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py
@@ -589,6 +589,15 @@ class OpenAIResponsesImpl:
                 chat_tools.append(ChatCompletionToolParam(type="function", function=input_tool.model_dump()))
             elif input_tool.type == "web_search":
                 tool_name = "web_search"
+
+                # we need to list all the toolgroups so tools can be found. avoid MCPs because they
+                # may need authentication.
+                groups = await self.tool_groups_api.list_tool_groups()
+                for group in groups:
+                    if not group.mcp_endpoint:
+                        continue
+                    _ = await self.tool_groups_api.list_tools(group.identifier)
+
                 tool = await self.tool_groups_api.get_tool(tool_name)
                 if not tool:
                     raise ValueError(f"Tool {tool_name} not found")
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index d046057eb..9c491accb 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -232,9 +232,17 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_
 
     # Check that we got the content from our mocked tool execution result
     chunks = [chunk async for chunk in result]
-    assert len(chunks) > 0
-    assert chunks[0].response.output[0].type == "function_call"
-    assert chunks[0].response.output[0].name == "get_weather"
+    assert len(chunks) == 2  # Should have response.created and response.completed
+
+    # Check response.created event (should have empty output)
+    assert chunks[0].type == "response.created"
+    assert len(chunks[0].response.output) == 0
+
+    # Check response.completed event (should have the tool call)
+    assert chunks[1].type == "response.completed"
+    assert len(chunks[1].response.output) == 1
+    assert chunks[1].response.output[0].type == "function_call"
+    assert chunks[1].response.output[0].name == "get_weather"
 
 
 @pytest.mark.asyncio
diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py
index e797bf14b..356e456e4 100644
--- a/tests/verifications/openai_api/test_responses.py
+++ b/tests/verifications/openai_api/test_responses.py
@@ -84,7 +84,7 @@ def test_response_streaming_basic(request, openai_client, model, provider, verif
         if chunk.type == "response.created":
             # Verify response.created is emitted first and immediately
             assert len(events) == 1, "response.created should be the first event"
-            assert event_times[0] < 0.1, "response.created should be emitted immediately (< 100ms)"
+            assert event_times[0] < 0.1, "response.created should be emitted immediately"
             assert chunk.response.status == "in_progress"
             response_id = chunk.response.id
 
@@ -109,11 +109,6 @@ def test_response_streaming_basic(request, openai_client, model, provider, verif
     completed_index = event_types.index("response.completed")
     assert created_index < completed_index, "response.created should come before response.completed"
 
-    # Verify timing - there should be some delay between events (indicating real processing)
-    if len(event_times) >= 2:
-        time_between_events = event_times[-1] - event_times[0]
-        assert time_between_events > 0.01, "There should be measurable time between events (indicating real processing)"
-
     # Verify stored response matches streamed response
     retrieved_response = openai_client.responses.retrieve(response_id=response_id)
     final_event = events[-1]