fix(responses): .content error when resuming responses with file search

# What does this PR do? ## Test Plan
2025-10-03 19:57:35 +00:00 · 2025-09-30 15:01:18 -07:00 · 2025-09-30 15:01:18 -07:00 · 64c22735d0
commit 64c22735d0
parent 42414a1a1b
2 changed files with 63 additions and 0 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@ -18,9 +18,11 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseMessage,
    OpenAIResponseOutputMessageContent,
    OpenAIResponseOutputMessageContentOutputText,
+    OpenAIResponseOutputMessageFileSearchToolCall,
    OpenAIResponseOutputMessageFunctionToolCall,
    OpenAIResponseOutputMessageMCPCall,
    OpenAIResponseOutputMessageMCPListTools,
+    OpenAIResponseOutputMessageWebSearchToolCall,
    OpenAIResponseText,
 )
 from llama_stack.apis.inference import (
@ -156,6 +158,13 @@ async def convert_response_input_to_chat_messages(
            ):
                # these are handled by the responses impl itself and not pass through to chat completions
                pass
+            elif isinstance(input_item, OpenAIResponseOutputMessageWebSearchToolCall):
+                # web search tool calls are metadata only and don't need to be converted to chat messages
+                pass
+            elif isinstance(input_item, OpenAIResponseOutputMessageFileSearchToolCall):
+                # file search tool calls are metadata only and don't need to be converted to chat messages
+                # OpenAI re-executes file search on each turn rather than caching results
+                pass
            else:
                content = await convert_response_content_to_chat_content(input_item.content)
                message_type = await get_message_type_by_role(input_item.role)
--- a/tests/integration/responses/test_tool_responses.py
+++ b/tests/integration/responses/test_tool_responses.py
@ -127,6 +127,60 @@ def test_response_non_streaming_file_search_empty_vector_store(compat_client, te
    assert response.output_text


+def test_response_file_search_with_previous_response_id(compat_client, text_model_id, tmp_path):
+    """Test that file_search works with previous_response_id without throwing AttributeError."""
+    if isinstance(compat_client, LlamaStackAsLibraryClient):
+        pytest.skip("Responses API file search is not yet supported in library client.")
+
+    vector_store = new_vector_store(compat_client, "test_vector_store")
+
+    # Create a test file with some content
+    file_name = "test_file_search_with_previous_response.txt"
+    file_path = tmp_path / file_name
+    file_content = "The Llama 4 Maverick model has 128 experts."
+    file_path.write_text(file_content)
+
+    # Upload the file and attach to vector store
+    file_response = upload_file(compat_client, file_name, file_path)
+    compat_client.vector_stores.files.create(
+        vector_store_id=vector_store.id,
+        file_id=file_response.id,
+    )
+    wait_for_file_attachment(compat_client, vector_store.id, file_response.id)
+
+    # First response with file_search
+    tools = [{"type": "file_search", "vector_store_ids": [vector_store.id]}]
+    response1 = compat_client.responses.create(
+        model=text_model_id,
+        input="How many experts does the Llama 4 Maverick model have?",
+        tools=tools,
+        stream=False,
+        include=["file_search_call.results"],
+    )
+
+    # Verify first response succeeded
+    assert len(response1.output) > 1
+    assert response1.output[0].type == "file_search_call"
+    assert response1.output[0].status == "completed"
+    assert response1.output_text
+
+    # Second response with previous_response_id - this is the key test for the fix
+    # This should NOT throw AttributeError about 'content'
+    response2 = compat_client.responses.create(
+        model=text_model_id,
+        input="What else can you tell me about this model?",
+        tools=tools,
+        stream=False,
+        previous_response_id=response1.id,
+    )
+
+    # Verify second response succeeded
+    assert response2.output
+    assert response2.output_text
+    # Verify the previous_response_id was captured
+    assert response2.previous_response_id == response1.id
+
+
@pytest.mark.parametrize("case", mcp_tool_test_cases)
 def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case):
    if not isinstance(compat_client, LlamaStackAsLibraryClient):