fix(vector store)!: fix file content API (#4105)

# What does this PR do? - changed to match https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml ## Test Plan updated test CI
2025-12-03 09:53:45 +00:00 · 2025-11-10 10:16:35 -08:00 · 2025-11-10 10:16:35 -08:00 · d4ecbfd092
commit d4ecbfd092
parent 4341c4c2ac
8 changed files with 93 additions and 114 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -2916,11 +2916,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -10465,41 +10465,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
          description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -2913,11 +2913,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -9749,41 +9749,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
          description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -2916,11 +2916,11 @@ paths:
      responses:
        '200':
          description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
          content:
            application/json:
              schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
@ -10465,41 +10465,35 @@ components:
      title: VectorStoreContent
      description: >-
        Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
      type: object
      properties:
-        file_id:
+        object:
          type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
          description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
          type: array
          items:
            $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
      additionalProperties: false
      required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
      description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
    OpenaiSearchVectorStoreRequest:
      type: object
      properties:
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):


@json_schema_type
-class VectorStoreFileContentsResponse(BaseModel):
-    """Response from retrieving the contents of a vector store file.
+class VectorStoreFileContentResponse(BaseModel):
+    """Represents the parsed content of a vector store file.

-    :param file_id: Unique identifier for the file
-    :param filename: Name of the file
-    :param attributes: Key-value attributes associated with the file
-    :param content: List of content items from the file
+    :param object: The object type, which is always `vector_store.file_content.page`
+    :param data: Parsed content of the file
+    :param has_more: Indicates if there are more content pages to fetch
+    :param next_page: The token for the next page, if any
    """

-    file_id: str
-    filename: str
-    attributes: dict[str, Any]
-    content: list[VectorStoreContent]
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
+    data: list[VectorStoreContent]
+    has_more: bool
+    next_page: str | None = None


@json_schema_type
@ -732,12 +732,12 @@ class VectorIO(Protocol):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file.

        :param vector_store_id: The ID of the vector store containing the file to retrieve.
        :param file_id: The ID of the file to retrieve.
-        :returns: A list of InterleavedContent representing the file contents.
+        :returns: A VectorStoreFileContentResponse representing the file contents.
        """
        ...

--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreChunkingStrategyStaticConfig,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFilesListInBatchResponse,
@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
        provider = await self.routing_table.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileDeleteResponse,
    VectorStoreFileObject,
    VectorStoreFileStatus,
@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
    VectorStoreContent,
    VectorStoreDeleteResponse,
    VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
    VectorStoreFileCounts,
    VectorStoreFileDeleteResponse,
    VectorStoreFileLastError,
@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
        self,
        vector_store_id: str,
        file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file."""
        if vector_store_id not in self.openai_vector_stores:
            raise VectorStoreNotFoundError(vector_store_id)

-        file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
        dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
        chunks = [Chunk.model_validate(c) for c in dict_chunks]
        content = []
        for chunk in chunks:
            content.extend(self._chunk_to_vector_store_content(chunk))
-        return VectorStoreFileContentsResponse(
-            file_id=file_id,
-            filename=file_info.get("filename", ""),
-            attributes=file_info.get("attributes", {}),
-            content=content,
+        return VectorStoreFileContentResponse(
+            object="vector_store.file_content.page",
+            data=content,
+            has_more=False,
+            next_page=None,
        )

    async def openai_update_vector_store_file(
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
    )

    assert file_contents is not None
-    assert len(file_contents.content) == 1
-    content = file_contents.content[0]
+    assert file_contents.object == "vector_store.file_content.page"
+    assert len(file_contents.data) == 1
+    content = file_contents.data[0]

    # llama-stack-client returns a model, openai-python is a badboy and returns a dict
    if not isinstance(content, dict):
        content = content.model_dump()
    assert content["type"] == "text"
    assert content["text"] == test_content.decode("utf-8")
-    assert file_contents.filename == file_name
-    assert file_contents.attributes == attributes
+    assert file_contents.has_more is False


@vector_provider_wrapper
@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        )

        assert file_contents is not None
-        assert file_contents.filename == file_data[i][0]
-        assert len(file_contents.content) > 0
+        assert file_contents.object == "vector_store.file_content.page"
+        assert len(file_contents.data) > 0

        # Verify the content matches what we uploaded
        content_text = (
-            file_contents.content[0].text
-            if hasattr(file_contents.content[0], "text")
-            else file_contents.content[0]["text"]
+            file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
        )
        assert file_data[i][1].decode("utf-8") in content_text