mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-29 11:24:19 +00:00
Add pdf support to file_search for Responses API
This adds basic PDF support (using our existing `parse_pdf` function) to the file_search tool and corresponding Vector Files API. When a PDF file is uploaded and attached to a vector store, we parse the pdf and then chunk its content as normal. This is not the best solution long-term, but it does match what we've been doing so far for PDF files in the memory tool. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
57eccf023d
commit
055885bd5a
4 changed files with 41 additions and 33 deletions
|
@ -9,6 +9,7 @@ import base64
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import mimetypes
|
||||||
import time
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
@ -19,7 +20,6 @@ from numpy.typing import NDArray
|
||||||
from llama_stack.apis.files import Files
|
from llama_stack.apis.files import Files
|
||||||
from llama_stack.apis.inference import InterleavedContent
|
from llama_stack.apis.inference import InterleavedContent
|
||||||
from llama_stack.apis.inference.inference import Inference
|
from llama_stack.apis.inference.inference import Inference
|
||||||
from llama_stack.apis.tools.rag_tool import RAGDocument
|
|
||||||
from llama_stack.apis.vector_dbs import VectorDB
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
from llama_stack.apis.vector_io import (
|
from llama_stack.apis.vector_io import (
|
||||||
Chunk,
|
Chunk,
|
||||||
|
@ -40,8 +40,8 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV
|
||||||
from llama_stack.providers.utils.memory.vector_store import (
|
from llama_stack.providers.utils.memory.vector_store import (
|
||||||
EmbeddingIndex,
|
EmbeddingIndex,
|
||||||
VectorDBWithIndex,
|
VectorDBWithIndex,
|
||||||
content_from_doc,
|
|
||||||
make_overlapped_chunks,
|
make_overlapped_chunks,
|
||||||
|
parse_pdf,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import FaissVectorIOConfig
|
from .config import FaissVectorIOConfig
|
||||||
|
@ -292,20 +292,23 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
||||||
chunk_overlap_tokens = 400
|
chunk_overlap_tokens = 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
file_response = await self.files_api.openai_retrieve_file(file_id)
|
||||||
|
mime_type, _ = mimetypes.guess_type(file_response.filename)
|
||||||
content_response = await self.files_api.openai_retrieve_file_content(file_id)
|
content_response = await self.files_api.openai_retrieve_file_content(file_id)
|
||||||
content = content_response.body
|
|
||||||
doc = RAGDocument(
|
# TODO: We can't use content_from_doc directly from vector_store
|
||||||
document_id=file_id,
|
# but should figure out how to centralize this logic near there
|
||||||
content=content,
|
if mime_type == "application/pdf":
|
||||||
metadata=attributes,
|
content = parse_pdf(content_response.body)
|
||||||
)
|
else:
|
||||||
content = await content_from_doc(doc)
|
content = content_response.body.decode("utf-8")
|
||||||
|
|
||||||
chunks = make_overlapped_chunks(
|
chunks = make_overlapped_chunks(
|
||||||
doc.document_id,
|
file_id,
|
||||||
content,
|
content,
|
||||||
max_chunk_size_tokens,
|
max_chunk_size_tokens,
|
||||||
chunk_overlap_tokens,
|
chunk_overlap_tokens,
|
||||||
doc.metadata,
|
attributes,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
|
|
Binary file not shown.
|
@ -39,7 +39,15 @@ test_response_file_search:
|
||||||
input: "How many experts does the Llama 4 Maverick model have?"
|
input: "How many experts does the Llama 4 Maverick model have?"
|
||||||
tools:
|
tools:
|
||||||
- type: file_search
|
- type: file_search
|
||||||
# vector_store_ids gets added by the test runner
|
# vector_store_ids param for file_search tool gets added by the test runner
|
||||||
|
file_content: "Llama 4 Maverick has 128 experts"
|
||||||
|
output: "128"
|
||||||
|
- case_id: "What is the "
|
||||||
|
input: "How many experts does the Llama 4 Maverick model have?"
|
||||||
|
tools:
|
||||||
|
- type: file_search
|
||||||
|
# vector_store_ids param for file_search toolgets added by the test runner
|
||||||
|
file_path: "pdfs/llama_stack_and_models.pdf"
|
||||||
output: "128"
|
output: "128"
|
||||||
|
|
||||||
test_response_mcp_tool:
|
test_response_mcp_tool:
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
@ -38,7 +39,7 @@ def _new_vector_store(openai_client, name):
|
||||||
return vector_store
|
return vector_store
|
||||||
|
|
||||||
|
|
||||||
def _new_file(openai_client, name, content, tmp_path):
|
def _upload_file(openai_client, name, file_path):
|
||||||
# Ensure we don't reuse an existing file
|
# Ensure we don't reuse an existing file
|
||||||
files = openai_client.files.list()
|
files = openai_client.files.list()
|
||||||
for file in files:
|
for file in files:
|
||||||
|
@ -46,8 +47,6 @@ def _new_file(openai_client, name, content, tmp_path):
|
||||||
openai_client.files.delete(file_id=file.id)
|
openai_client.files.delete(file_id=file.id)
|
||||||
|
|
||||||
# Upload a text file with our document content
|
# Upload a text file with our document content
|
||||||
file_path = tmp_path / name
|
|
||||||
file_path.write_text(content)
|
|
||||||
return openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
|
return openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
|
||||||
|
|
||||||
|
|
||||||
|
@ -291,7 +290,7 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid
|
||||||
responses_test_cases["test_response_file_search"]["test_params"]["case"],
|
responses_test_cases["test_response_file_search"]["test_params"]["case"],
|
||||||
ids=case_id_generator,
|
ids=case_id_generator,
|
||||||
)
|
)
|
||||||
def test_response_non_streaming_file_search_simple_text(
|
def test_response_non_streaming_file_search(
|
||||||
request, openai_client, model, provider, verification_config, tmp_path, case
|
request, openai_client, model, provider, verification_config, tmp_path, case
|
||||||
):
|
):
|
||||||
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
||||||
|
@ -303,8 +302,17 @@ def test_response_non_streaming_file_search_simple_text(
|
||||||
|
|
||||||
vector_store = _new_vector_store(openai_client, "test_vector_store")
|
vector_store = _new_vector_store(openai_client, "test_vector_store")
|
||||||
|
|
||||||
file_content = "Llama 4 Maverick has 128 experts"
|
if "file_content" in case:
|
||||||
file_response = _new_file(openai_client, "test_response_non_streaming_file_search.txt", file_content, tmp_path)
|
file_name = "test_response_non_streaming_file_search.txt"
|
||||||
|
file_path = tmp_path / file_name
|
||||||
|
file_path.write_text(case["file_content"])
|
||||||
|
elif "file_path" in case:
|
||||||
|
file_path = os.path.join(os.path.dirname(__file__), "fixtures", case["file_path"])
|
||||||
|
file_name = os.path.basename(file_path)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"No file content or path provided for case {case['case_id']}")
|
||||||
|
|
||||||
|
file_response = _upload_file(openai_client, file_name, file_path)
|
||||||
|
|
||||||
# Attach our file to the vector store
|
# Attach our file to the vector store
|
||||||
file_attach_response = openai_client.vector_stores.files.create(
|
file_attach_response = openai_client.vector_stores.files.create(
|
||||||
|
@ -343,7 +351,7 @@ def test_response_non_streaming_file_search_simple_text(
|
||||||
assert response.output[0].status == "completed"
|
assert response.output[0].status == "completed"
|
||||||
assert response.output[0].queries # ensure it's some non-empty list
|
assert response.output[0].queries # ensure it's some non-empty list
|
||||||
assert response.output[0].results
|
assert response.output[0].results
|
||||||
assert response.output[0].results[0].text == file_content
|
assert case["output"].lower() in response.output[0].results[0].text.lower()
|
||||||
assert response.output[0].results[0].score > 0
|
assert response.output[0].results[0].score > 0
|
||||||
|
|
||||||
# Verify the assistant response that summarizes the results
|
# Verify the assistant response that summarizes the results
|
||||||
|
@ -354,13 +362,8 @@ def test_response_non_streaming_file_search_simple_text(
|
||||||
assert case["output"].lower() in response.output_text.lower().strip()
|
assert case["output"].lower() in response.output_text.lower().strip()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"case",
|
|
||||||
responses_test_cases["test_response_file_search"]["test_params"]["case"],
|
|
||||||
ids=case_id_generator,
|
|
||||||
)
|
|
||||||
def test_response_non_streaming_file_search_empty_vector_store(
|
def test_response_non_streaming_file_search_empty_vector_store(
|
||||||
request, openai_client, model, provider, verification_config, tmp_path, case
|
request, openai_client, model, provider, verification_config
|
||||||
):
|
):
|
||||||
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
if isinstance(openai_client, LlamaStackAsLibraryClient):
|
||||||
pytest.skip("Responses API file search is not yet supported in library client.")
|
pytest.skip("Responses API file search is not yet supported in library client.")
|
||||||
|
@ -371,17 +374,11 @@ def test_response_non_streaming_file_search_empty_vector_store(
|
||||||
|
|
||||||
vector_store = _new_vector_store(openai_client, "test_vector_store")
|
vector_store = _new_vector_store(openai_client, "test_vector_store")
|
||||||
|
|
||||||
# Update our tools with the right vector store id
|
|
||||||
tools = case["tools"]
|
|
||||||
for tool in tools:
|
|
||||||
if tool["type"] == "file_search":
|
|
||||||
tool["vector_store_ids"] = [vector_store.id]
|
|
||||||
|
|
||||||
# Create the response request, which should query our vector store
|
# Create the response request, which should query our vector store
|
||||||
response = openai_client.responses.create(
|
response = openai_client.responses.create(
|
||||||
model=model,
|
model=model,
|
||||||
input=case["input"],
|
input="How many experts does the Llama 4 Maverick model have?",
|
||||||
tools=case["tools"],
|
tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}],
|
||||||
stream=False,
|
stream=False,
|
||||||
include=["file_search_call.results"],
|
include=["file_search_call.results"],
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue