feat: update search for vector_stores (#2441)

Updated the `search` functionality return response to match openai. ## Test Plan ``` pytest -sv --stack-config=http://localhost:8321 tests/integration/vector_io/test_openai_vector_stores.py --embedding-model all-MiniLM-L6-v2 ```
2025-12-03 09:53:45 +00:00 · 2025-06-12 15:34:22 -07:00 · 2025-06-12 15:34:22 -07:00 · 0bc1747ed8
commit 0bc1747ed8
parent 35c2817d0a
9 changed files with 236 additions and 106 deletions
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -8,7 +8,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any, Protocol, runtime_checkable
+from typing import Any, Literal, Protocol, runtime_checkable

 from pydantic import BaseModel, Field

@ -96,13 +96,30 @@ class VectorStoreSearchRequest(BaseModel):
    rewrite_query: bool = False


+@json_schema_type
+class VectorStoreContent(BaseModel):
+    type: Literal["text"]
+    text: str
+
+
@json_schema_type
 class VectorStoreSearchResponse(BaseModel):
    """Response from searching a vector store."""

+    file_id: str
+    filename: str
+    score: float
+    attributes: dict[str, str | float | bool] | None = None
+    content: list[VectorStoreContent]
+
+
+@json_schema_type
+class VectorStoreSearchResponsePage(BaseModel):
+    """Response from searching a vector store."""
+
    object: str = "vector_store.search_results.page"
    search_query: str
-    data: list[dict[str, Any]]
+    data: list[VectorStoreSearchResponse]
    has_more: bool = False
    next_page: str | None = None

@ -259,7 +276,7 @@ class VectorIO(Protocol):
        max_num_results: int | None = 10,
        ranking_options: dict[str, Any] | None = None,
        rewrite_query: bool | None = False,
-    ) -> VectorStoreSearchResponse:
+    ) -> VectorStoreSearchResponsePage:
        """Search for chunks in a vector store.

        Searches a vector store for relevant chunks based on a query and optional file attribute filters.
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@ -17,7 +17,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreDeleteResponse,
    VectorStoreListResponse,
    VectorStoreObject,
-    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import RoutingTable
@ -242,7 +242,7 @@ class VectorIORouter(VectorIO):
        max_num_results: int | None = 10,
        ranking_options: dict[str, Any] | None = None,
        rewrite_query: bool | None = False,
-    ) -> VectorStoreSearchResponse:
+    ) -> VectorStoreSearchResponsePage:
        logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
        # Route based on vector store ID
        provider = self.routing_table.get_provider_impl(vector_store_id)
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -21,7 +21,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreDeleteResponse,
    VectorStoreListResponse,
    VectorStoreObject,
-    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
@ -239,5 +239,5 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: dict[str, Any] | None = None,
        rewrite_query: bool | None = False,
-    ) -> VectorStoreSearchResponse:
+    ) -> VectorStoreSearchResponsePage:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -23,7 +23,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreDeleteResponse,
    VectorStoreListResponse,
    VectorStoreObject,
-    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
@ -237,7 +237,7 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: dict[str, Any] | None = None,
        rewrite_query: bool | None = False,
-    ) -> VectorStoreSearchResponse:
+    ) -> VectorStoreSearchResponsePage:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")


--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -21,7 +21,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreDeleteResponse,
    VectorStoreListResponse,
    VectorStoreObject,
-    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
@ -239,5 +239,5 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: dict[str, Any] | None = None,
        rewrite_query: bool | None = False,
-    ) -> VectorStoreSearchResponse:
+    ) -> VectorStoreSearchResponsePage:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -13,10 +13,12 @@ from typing import Any
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
    QueryChunksResponse,
+    VectorStoreContent,
    VectorStoreDeleteResponse,
    VectorStoreListResponse,
    VectorStoreObject,
    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
 )

 logger = logging.getLogger(__name__)
@ -85,7 +87,6 @@ class OpenAIVectorStoreMixin(ABC):
        provider_vector_db_id: str | None = None,
    ) -> VectorStoreObject:
        """Creates a vector store."""
-        print("IN OPENAI VECTOR STORE MIXIN, openai_create_vector_store")
        # store and vector_db have the same id
        store_id = name or str(uuid.uuid4())
        created_at = int(time.time())
@ -281,7 +282,7 @@ class OpenAIVectorStoreMixin(ABC):
        ranking_options: dict[str, Any] | None = None,
        rewrite_query: bool | None = False,
        # search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
-    ) -> VectorStoreSearchResponse:
+    ) -> VectorStoreSearchResponsePage:
        """Search for chunks in a vector store."""
        # TODO: Add support in the API for this
        search_mode = "vector"
@ -312,7 +313,7 @@ class OpenAIVectorStoreMixin(ABC):

            # Convert response to OpenAI format
            data = []
-            for i, (chunk, score) in enumerate(zip(response.chunks, response.scores, strict=False)):
+            for chunk, score in zip(response.chunks, response.scores, strict=False):
                # Apply score based filtering
                if score < score_threshold:
                    continue
@ -323,18 +324,46 @@ class OpenAIVectorStoreMixin(ABC):
                    if not self._matches_filters(chunk.metadata, filters):
                        continue

-                chunk_data = {
-                    "id": f"chunk_{i}",
-                    "object": "vector_store.search_result",
-                    "score": score,
-                    "content": chunk.content.content if hasattr(chunk.content, "content") else str(chunk.content),
-                    "metadata": chunk.metadata,
-                }
-                data.append(chunk_data)
+                # content is InterleavedContent
+                if isinstance(chunk.content, str):
+                    content = [
+                        VectorStoreContent(
+                            type="text",
+                            text=chunk.content,
+                        )
+                    ]
+                elif isinstance(chunk.content, list):
+                    # TODO: Add support for other types of content
+                    content = [
+                        VectorStoreContent(
+                            type="text",
+                            text=item.text,
+                        )
+                        for item in chunk.content
+                        if item.type == "text"
+                    ]
+                else:
+                    if chunk.content.type != "text":
+                        raise ValueError(f"Unsupported content type: {chunk.content.type}")
+                    content = [
+                        VectorStoreContent(
+                            type="text",
+                            text=chunk.content.text,
+                        )
+                    ]
+
+                response_data_item = VectorStoreSearchResponse(
+                    file_id=chunk.metadata.get("file_id", ""),
+                    filename=chunk.metadata.get("filename", ""),
+                    score=score,
+                    attributes=chunk.metadata,
+                    content=content,
+                )
+                data.append(response_data_item)
                if len(data) >= max_num_results:
                    break

-            return VectorStoreSearchResponse(
+            return VectorStoreSearchResponsePage(
                search_query=search_query,
                data=data,
                has_more=False,  # For simplicity, we don't implement pagination here
@ -344,7 +373,7 @@ class OpenAIVectorStoreMixin(ABC):
        except Exception as e:
            logger.error(f"Error searching vector store {vector_store_id}: {e}")
            # Return empty results on error
-            return VectorStoreSearchResponse(
+            return VectorStoreSearchResponsePage(
                search_query=search_query,
                data=[],
                has_more=False,