diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py index 310a88298..aa223c261 100644 --- a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py +++ b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py @@ -18,9 +18,11 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseMessage, OpenAIResponseOutputMessageContent, OpenAIResponseOutputMessageContentOutputText, + OpenAIResponseOutputMessageFileSearchToolCall, OpenAIResponseOutputMessageFunctionToolCall, OpenAIResponseOutputMessageMCPCall, OpenAIResponseOutputMessageMCPListTools, + OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseText, ) from llama_stack.apis.inference import ( @@ -156,6 +158,13 @@ async def convert_response_input_to_chat_messages( ): # these are handled by the responses impl itself and not pass through to chat completions pass + elif isinstance(input_item, OpenAIResponseOutputMessageWebSearchToolCall): + # web search tool calls are metadata only and don't need to be converted to chat messages + pass + elif isinstance(input_item, OpenAIResponseOutputMessageFileSearchToolCall): + # file search tool calls are metadata only and don't need to be converted to chat messages + # OpenAI re-executes file search on each turn rather than caching results + pass else: content = await convert_response_content_to_chat_content(input_item.content) message_type = await get_message_type_by_role(input_item.role) diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py index f23734892..1d668ba20 100644 --- a/tests/integration/responses/test_tool_responses.py +++ b/tests/integration/responses/test_tool_responses.py @@ -127,6 +127,60 @@ def test_response_non_streaming_file_search_empty_vector_store(compat_client, te assert response.output_text +def test_response_file_search_with_previous_response_id(compat_client, text_model_id, tmp_path): + """Test that file_search works with previous_response_id without throwing AttributeError.""" + if isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("Responses API file search is not yet supported in library client.") + + vector_store = new_vector_store(compat_client, "test_vector_store") + + # Create a test file with some content + file_name = "test_file_search_with_previous_response.txt" + file_path = tmp_path / file_name + file_content = "The Llama 4 Maverick model has 128 experts." + file_path.write_text(file_content) + + # Upload the file and attach to vector store + file_response = upload_file(compat_client, file_name, file_path) + compat_client.vector_stores.files.create( + vector_store_id=vector_store.id, + file_id=file_response.id, + ) + wait_for_file_attachment(compat_client, vector_store.id, file_response.id) + + # First response with file_search + tools = [{"type": "file_search", "vector_store_ids": [vector_store.id]}] + response1 = compat_client.responses.create( + model=text_model_id, + input="How many experts does the Llama 4 Maverick model have?", + tools=tools, + stream=False, + include=["file_search_call.results"], + ) + + # Verify first response succeeded + assert len(response1.output) > 1 + assert response1.output[0].type == "file_search_call" + assert response1.output[0].status == "completed" + assert response1.output_text + + # Second response with previous_response_id - this is the key test for the fix + # This should NOT throw AttributeError about 'content' + response2 = compat_client.responses.create( + model=text_model_id, + input="What else can you tell me about this model?", + tools=tools, + stream=False, + previous_response_id=response1.id, + ) + + # Verify second response succeeded + assert response2.output + assert response2.output_text + # Verify the previous_response_id was captured + assert response2.previous_response_id == response1.id + + @pytest.mark.parametrize("case", mcp_tool_test_cases) def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case): if not isinstance(compat_client, LlamaStackAsLibraryClient):