rebase and incorporate PR feedback

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-12-03 01:48:05 +00:00 · 2025-11-10 22:38:17 -05:00 · 2025-11-10 22:38:17 -05:00 · 765f7623c1
commit 765f7623c1
parent 726bdc414d
7 changed files with 49 additions and 64 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -2691,16 +2691,8 @@ paths:
      responses:
        '200':
          description: >-
-<<<<<<< HEAD
-<<<<<<< HEAD
-            A VectorStoreFileContentResponse representing the file contents.
-=======
-            File contents, optionally with embeddings and metadata based on extra_query
-=======
            File contents, optionally with embeddings and metadata based on query
->>>>>>> c192529c (use FastAPI Query class instead of custom middlware)
            parameters.
->>>>>>> 639f0daa (feat: Adding optional embeddings to content)
          content:
            application/json:
              schema:
@ -10132,11 +10124,10 @@ components:
          items:
            type: number
          description: >-
-            Optional embedding vector for this content chunk (when requested via extra_body)
+            Optional embedding vector for this content chunk
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
-          description: >-
-            Optional chunk metadata (when requested via extra_body)
+          description: Optional chunk metadata
        metadata:
          type: object
          additionalProperties:
@ -10147,8 +10138,7 @@ components:
              - type: string
              - type: array
              - type: object
-          description: >-
-            Optional user-defined metadata (when requested via extra_body)
+          description: Optional user-defined metadata
      additionalProperties: false
      required:
        - type
@ -10172,6 +10162,7 @@ components:
          description: Parsed content of the file
        has_more:
          type: boolean
+          default: false
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -2688,16 +2688,8 @@ paths:
      responses:
        '200':
          description: >-
-<<<<<<< HEAD
-<<<<<<< HEAD
-            A VectorStoreFileContentResponse representing the file contents.
-=======
-            File contents, optionally with embeddings and metadata based on extra_query
-=======
            File contents, optionally with embeddings and metadata based on query
->>>>>>> c192529c (use FastAPI Query class instead of custom middlware)
            parameters.
->>>>>>> 639f0daa (feat: Adding optional embeddings to content)
          content:
            application/json:
              schema:
@ -9416,11 +9408,10 @@ components:
          items:
            type: number
          description: >-
-            Optional embedding vector for this content chunk (when requested via extra_body)
+            Optional embedding vector for this content chunk
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
-          description: >-
-            Optional chunk metadata (when requested via extra_body)
+          description: Optional chunk metadata
        metadata:
          type: object
          additionalProperties:
@ -9431,8 +9422,7 @@ components:
              - type: string
              - type: array
              - type: object
-          description: >-
-            Optional user-defined metadata (when requested via extra_body)
+          description: Optional user-defined metadata
      additionalProperties: false
      required:
        - type
@ -9456,6 +9446,7 @@ components:
          description: Parsed content of the file
        has_more:
          type: boolean
+          default: false
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -2691,16 +2691,8 @@ paths:
      responses:
        '200':
          description: >-
-<<<<<<< HEAD
-<<<<<<< HEAD
-            A VectorStoreFileContentResponse representing the file contents.
-=======
-            File contents, optionally with embeddings and metadata based on extra_query
-=======
            File contents, optionally with embeddings and metadata based on query
->>>>>>> c192529c (use FastAPI Query class instead of custom middlware)
            parameters.
->>>>>>> 639f0daa (feat: Adding optional embeddings to content)
          content:
            application/json:
              schema:
@ -10132,11 +10124,10 @@ components:
          items:
            type: number
          description: >-
-            Optional embedding vector for this content chunk (when requested via extra_body)
+            Optional embedding vector for this content chunk
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
-          description: >-
-            Optional chunk metadata (when requested via extra_body)
+          description: Optional chunk metadata
        metadata:
          type: object
          additionalProperties:
@ -10147,8 +10138,7 @@ components:
              - type: string
              - type: array
              - type: object
-          description: >-
-            Optional user-defined metadata (when requested via extra_body)
+          description: Optional user-defined metadata
      additionalProperties: false
      required:
        - type
@ -10172,6 +10162,7 @@ components:
          description: Parsed content of the file
        has_more:
          type: boolean
+          default: false
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -224,9 +224,9 @@ class VectorStoreContent(BaseModel):

    :param type: Content type, currently only "text" is supported
    :param text: The actual text content
-    :param embedding: Optional embedding vector for this content chunk (when requested via extra_body)
-    :param chunk_metadata: Optional chunk metadata (when requested via extra_body)
-    :param metadata: Optional user-defined metadata (when requested via extra_body)
+    :param embedding: Optional embedding vector for this content chunk
+    :param chunk_metadata: Optional chunk metadata
+    :param metadata: Optional user-defined metadata
    """

    type: Literal["text"]
@ -286,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel):
    deleted: bool = True


+@json_schema_type
+class VectorStoreFileContentResponse(BaseModel):
+    """Represents the parsed content of a vector store file.
+
+    :param object: The object type, which is always `vector_store.file_content.page`
+    :param data: Parsed content of the file
+    :param has_more: Indicates if there are more content pages to fetch
+    :param next_page: The token for the next page, if any
+    """
+
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
+    data: list[VectorStoreContent]
+    has_more: bool = False
+    next_page: str | None = None
+
+
@json_schema_type
 class VectorStoreChunkingStrategyAuto(BaseModel):
    """Automatic chunking strategy for vector store files.
@ -724,7 +740,7 @@ class VectorIO(Protocol):
        file_id: str,
        include_embeddings: Annotated[bool | None, Query(default=False)] = False,
        include_metadata: Annotated[bool | None, Query(default=False)] = False,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file.

        :param vector_store_id: The ID of the vector store containing the file to retrieve.
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreChunkingStrategyStaticConfig,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFilesListInBatchResponse,
@ -347,7 +347,7 @@ class VectorIORouter(VectorIO):
        file_id: str,
        include_embeddings: bool | None = False,
        include_metadata: bool | None = False,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        logger.debug(
            f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
            f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFileStatus,
@ -197,7 +197,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
        file_id: str,
        include_embeddings: bool | None = False,
        include_metadata: bool | None = False,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        await self.assert_action_allowed("read", "vector_store", vector_store_id)

        provider = await self.get_provider_impl(vector_store_id)
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreContent,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileCounts,
    VectorStoreFileDeleteResponse,
    VectorStoreFileLastError,
@ -704,18 +704,18 @@ class OpenAIVectorStoreMixin(ABC):
            # Unknown filter type, default to no match
            raise ValueError(f"Unsupported filter type: {filter_type}")

-    def _extract_chunk_fields(self, chunk: Chunk, include_embeddings: bool, include_metadata: bool) -> dict:
-        """Extract embedding and metadata fields from chunk based on include flags."""
-        return {
-            "embedding": chunk.embedding if include_embeddings else None,
-            "chunk_metadata": chunk.chunk_metadata if include_metadata else None,
-            "metadata": chunk.metadata if include_metadata else None,
-        }
-
    def _chunk_to_vector_store_content(
        self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False
    ) -> list[VectorStoreContent]:
-        fields = self._extract_chunk_fields(chunk, include_embeddings, include_metadata)
+        def extract_fields() -> dict:
+            """Extract embedding and metadata fields from chunk based on include flags."""
+            return {
+                "embedding": chunk.embedding if include_embeddings else None,
+                "chunk_metadata": chunk.chunk_metadata if include_metadata else None,
+                "metadata": chunk.metadata if include_metadata else None,
+            }
+
+        fields = extract_fields()

        if isinstance(chunk.content, str):
            content_item = VectorStoreContent(type="text", text=chunk.content, **fields)
@ -923,7 +923,7 @@ class OpenAIVectorStoreMixin(ABC):
        file_id: str,
        include_embeddings: bool | None = False,
        include_metadata: bool | None = False,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file."""
        if vector_store_id not in self.openai_vector_stores:
            raise VectorStoreNotFoundError(vector_store_id)
@ -931,7 +931,6 @@ class OpenAIVectorStoreMixin(ABC):
        # Parameters are already provided directly
        # include_embeddings and include_metadata are now function parameters

-        file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
        dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
        chunks = [Chunk.model_validate(c) for c in dict_chunks]
        content = []
@ -941,11 +940,8 @@ class OpenAIVectorStoreMixin(ABC):
                    chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False
                )
            )
-        return VectorStoreFileContentsResponse(
-            file_id=file_id,
-            filename=file_info.get("filename", ""),
-            attributes=file_info.get("attributes", {}),
-            content=content,
+        return VectorStoreFileContentResponse(
+            data=content,
        )

    async def openai_update_vector_store_file(