diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index d0f6dd6e3..b1326c06f 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -9,6 +9,7 @@ import base64 import io import json import logging +import mimetypes import time from typing import Any @@ -19,7 +20,6 @@ from numpy.typing import NDArray from llama_stack.apis.files import Files from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference.inference import Inference -from llama_stack.apis.tools.rag_tool import RAGDocument from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, @@ -40,8 +40,8 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, - content_from_doc, make_overlapped_chunks, + parse_pdf, ) from .config import FaissVectorIOConfig @@ -292,20 +292,23 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr chunk_overlap_tokens = 400 try: + file_response = await self.files_api.openai_retrieve_file(file_id) + mime_type, _ = mimetypes.guess_type(file_response.filename) content_response = await self.files_api.openai_retrieve_file_content(file_id) - content = content_response.body - doc = RAGDocument( - document_id=file_id, - content=content, - metadata=attributes, - ) - content = await content_from_doc(doc) + + # TODO: We can't use content_from_doc directly from vector_store + # but should figure out how to centralize this logic near there + if mime_type == "application/pdf": + content = parse_pdf(content_response.body) + else: + content = content_response.body.decode("utf-8") + chunks = make_overlapped_chunks( - doc.document_id, + file_id, content, max_chunk_size_tokens, chunk_overlap_tokens, - doc.metadata, + attributes, ) if not chunks: diff --git a/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf b/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf new file mode 100644 index 000000000..25579f425 Binary files /dev/null and b/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf differ diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml index 1ce25181e..4860715cf 100644 --- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml +++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml @@ -39,7 +39,15 @@ test_response_file_search: input: "How many experts does the Llama 4 Maverick model have?" tools: - type: file_search - # vector_store_ids gets added by the test runner + # vector_store_ids param for file_search tool gets added by the test runner + file_content: "Llama 4 Maverick has 128 experts" + output: "128" + - case_id: "What is the " + input: "How many experts does the Llama 4 Maverick model have?" + tools: + - type: file_search + # vector_store_ids param for file_search toolgets added by the test runner + file_path: "pdfs/llama_stack_and_models.pdf" output: "128" test_response_mcp_tool: diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py index f3e306e63..66eada4ba 100644 --- a/tests/verifications/openai_api/test_responses.py +++ b/tests/verifications/openai_api/test_responses.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import json +import os import time import httpx @@ -38,7 +39,7 @@ def _new_vector_store(openai_client, name): return vector_store -def _new_file(openai_client, name, content, tmp_path): +def _upload_file(openai_client, name, file_path): # Ensure we don't reuse an existing file files = openai_client.files.list() for file in files: @@ -46,8 +47,6 @@ def _new_file(openai_client, name, content, tmp_path): openai_client.files.delete(file_id=file.id) # Upload a text file with our document content - file_path = tmp_path / name - file_path.write_text(content) return openai_client.files.create(file=open(file_path, "rb"), purpose="assistants") @@ -291,7 +290,7 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid responses_test_cases["test_response_file_search"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_file_search_simple_text( +def test_response_non_streaming_file_search( request, openai_client, model, provider, verification_config, tmp_path, case ): if isinstance(openai_client, LlamaStackAsLibraryClient): @@ -303,8 +302,17 @@ def test_response_non_streaming_file_search_simple_text( vector_store = _new_vector_store(openai_client, "test_vector_store") - file_content = "Llama 4 Maverick has 128 experts" - file_response = _new_file(openai_client, "test_response_non_streaming_file_search.txt", file_content, tmp_path) + if "file_content" in case: + file_name = "test_response_non_streaming_file_search.txt" + file_path = tmp_path / file_name + file_path.write_text(case["file_content"]) + elif "file_path" in case: + file_path = os.path.join(os.path.dirname(__file__), "fixtures", case["file_path"]) + file_name = os.path.basename(file_path) + else: + raise ValueError(f"No file content or path provided for case {case['case_id']}") + + file_response = _upload_file(openai_client, file_name, file_path) # Attach our file to the vector store file_attach_response = openai_client.vector_stores.files.create( @@ -343,7 +351,7 @@ def test_response_non_streaming_file_search_simple_text( assert response.output[0].status == "completed" assert response.output[0].queries # ensure it's some non-empty list assert response.output[0].results - assert response.output[0].results[0].text == file_content + assert case["output"].lower() in response.output[0].results[0].text.lower() assert response.output[0].results[0].score > 0 # Verify the assistant response that summarizes the results @@ -354,13 +362,8 @@ def test_response_non_streaming_file_search_simple_text( assert case["output"].lower() in response.output_text.lower().strip() -@pytest.mark.parametrize( - "case", - responses_test_cases["test_response_file_search"]["test_params"]["case"], - ids=case_id_generator, -) def test_response_non_streaming_file_search_empty_vector_store( - request, openai_client, model, provider, verification_config, tmp_path, case + request, openai_client, model, provider, verification_config ): if isinstance(openai_client, LlamaStackAsLibraryClient): pytest.skip("Responses API file search is not yet supported in library client.") @@ -371,17 +374,11 @@ def test_response_non_streaming_file_search_empty_vector_store( vector_store = _new_vector_store(openai_client, "test_vector_store") - # Update our tools with the right vector store id - tools = case["tools"] - for tool in tools: - if tool["type"] == "file_search": - tool["vector_store_ids"] = [vector_store.id] - # Create the response request, which should query our vector store response = openai_client.responses.create( model=model, - input=case["input"], - tools=case["tools"], + input="How many experts does the Llama 4 Maverick model have?", + tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}], stream=False, include=["file_search_call.results"], )