feat: support filters in file search (#2472)

# What does this PR do? Move to use vector_stores.search for file search tool in Responses, which supports filters. closes #2435 ## Test Plan Added e2e test with fitlers. myenv ❯ llama stack run llama_stack/templates/fireworks/run.yaml pytest -sv tests/verifications/openai_api/test_responses.py \ -k 'file_search and filters' \ --base-url=http://localhost:8321/v1/openai/v1 \ --model=meta-llama/Llama-3.3-70B-Instruct
2025-06-27 18:50:41 +00:00 · 2025-06-18 21:50:55 -07:00 · 2025-06-18 21:50:55 -07:00 · db2cd9e8f3
commit db2cd9e8f3
parent fd37a50e6a
13 changed files with 449 additions and 63 deletions
--- a/llama_stack/apis/agents/openai_responses.py
+++ b/llama_stack/apis/agents/openai_responses.py
@ -9,6 +9,7 @@ from typing import Annotated, Any, Literal
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict

+from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
 from llama_stack.schema_utils import json_schema_type, register_schema

 # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably
@ -400,11 +401,6 @@ class OpenAIResponseInputToolFunction(BaseModel):
    strict: bool | None = None


-class FileSearchRankingOptions(BaseModel):
-    ranker: str | None = None
-    score_threshold: float | None = Field(default=0.0, ge=0.0, le=1.0)
-
-
@json_schema_type
 class OpenAIResponseInputToolFileSearch(BaseModel):
    type: Literal["file_search"] = "file_search"
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -157,6 +157,11 @@ VectorStoreChunkingStrategy = Annotated[
 register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")


+class SearchRankingOptions(BaseModel):
+    ranker: str | None = None
+    score_threshold: float | None = Field(default=0.0, ge=0.0, le=1.0)
+
+
@json_schema_type
 class VectorStoreFileLastError(BaseModel):
    code: Literal["server_error"] | Literal["rate_limit_exceeded"]
@ -319,7 +324,7 @@ class VectorIO(Protocol):
        query: str | list[str],
        filters: dict[str, Any] | None = None,
        max_num_results: int | None = 10,
-        ranking_options: dict[str, Any] | None = None,
+        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
    ) -> VectorStoreSearchResponsePage:
        """Search for chunks in a vector store.