From d4ecbfd092a7502b4b3ffffbbc3df75c8c38862d Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Mon, 10 Nov 2025 10:16:35 -0800
Subject: [PATCH] fix(vector store)!: fix file content API (#4105)

# What does this PR do?
- changed to match
https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml

## Test Plan
updated test CI
---
 client-sdks/stainless/openapi.yml             | 48 ++++++++-----------
 docs/static/llama-stack-spec.yaml             | 48 ++++++++-----------
 docs/static/stainless-llama-stack-spec.yaml   | 48 ++++++++-----------
 src/llama_stack/apis/vector_io/vector_io.py   | 24 +++++-----
 src/llama_stack/core/routers/vector_io.py     |  4 +-
 .../core/routing_tables/vector_stores.py      |  4 +-
 .../utils/memory/openai_vector_store_mixin.py | 15 +++---
 .../vector_io/test_openai_vector_stores.py    | 16 +++----
 8 files changed, 93 insertions(+), 114 deletions(-)

diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index d8159be62..adee2f086 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -2916,11 +2916,11 @@ paths:
       responses:
         '200':
           description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -10465,41 +10465,35 @@ components:
       title: VectorStoreContent
       description: >-
         Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
       type: object
       properties:
-        file_id:
+        object:
           type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
           description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
           type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
       additionalProperties: false
       required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
       description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
     OpenaiSearchVectorStoreRequest:
       type: object
       properties:
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ea7fd6eec..72600bf13 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -2913,11 +2913,11 @@ paths:
       responses:
         '200':
           description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -9749,41 +9749,35 @@ components:
       title: VectorStoreContent
       description: >-
         Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
       type: object
       properties:
-        file_id:
+        object:
           type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
           description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
           type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
       additionalProperties: false
       required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
       description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
     OpenaiSearchVectorStoreRequest:
       type: object
       properties:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index d8159be62..adee2f086 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -2916,11 +2916,11 @@ paths:
       responses:
         '200':
           description: >-
-            A list of InterleavedContent representing the file contents.
+            A VectorStoreFileContentResponse representing the file contents.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VectorStoreFileContentsResponse'
+                $ref: '#/components/schemas/VectorStoreFileContentResponse'
         '400':
           $ref: '#/components/responses/BadRequest400'
         '429':
@@ -10465,41 +10465,35 @@ components:
       title: VectorStoreContent
       description: >-
         Content item from a vector store file or search result.
-    VectorStoreFileContentsResponse:
+    VectorStoreFileContentResponse:
       type: object
       properties:
-        file_id:
+        object:
           type: string
-          description: Unique identifier for the file
-        filename:
-          type: string
-          description: Name of the file
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
+          const: vector_store.file_content.page
+          default: vector_store.file_content.page
           description: >-
-            Key-value attributes associated with the file
-        content:
+            The object type, which is always `vector_store.file_content.page`
+        data:
           type: array
           items:
             $ref: '#/components/schemas/VectorStoreContent'
-          description: List of content items from the file
+          description: Parsed content of the file
+        has_more:
+          type: boolean
+          description: >-
+            Indicates if there are more content pages to fetch
+        next_page:
+          type: string
+          description: The token for the next page, if any
       additionalProperties: false
       required:
-        - file_id
-        - filename
-        - attributes
-        - content
-      title: VectorStoreFileContentsResponse
+        - object
+        - data
+        - has_more
+      title: VectorStoreFileContentResponse
       description: >-
-        Response from retrieving the contents of a vector store file.
+        Represents the parsed content of a vector store file.
     OpenaiSearchVectorStoreRequest:
       type: object
       properties:
diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py
index 26c961db3..846c6f191 100644
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):
 
 
 @json_schema_type
-class VectorStoreFileContentsResponse(BaseModel):
-    """Response from retrieving the contents of a vector store file.
+class VectorStoreFileContentResponse(BaseModel):
+    """Represents the parsed content of a vector store file.
 
-    :param file_id: Unique identifier for the file
-    :param filename: Name of the file
-    :param attributes: Key-value attributes associated with the file
-    :param content: List of content items from the file
+    :param object: The object type, which is always `vector_store.file_content.page`
+    :param data: Parsed content of the file
+    :param has_more: Indicates if there are more content pages to fetch
+    :param next_page: The token for the next page, if any
     """
 
-    file_id: str
-    filename: str
-    attributes: dict[str, Any]
-    content: list[VectorStoreContent]
+    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
+    data: list[VectorStoreContent]
+    has_more: bool
+    next_page: str | None = None
 
 
 @json_schema_type
@@ -732,12 +732,12 @@ class VectorIO(Protocol):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file.
 
         :param vector_store_id: The ID of the vector store containing the file to retrieve.
         :param file_id: The ID of the file to retrieve.
-        :returns: A list of InterleavedContent representing the file contents.
+        :returns: A VectorStoreFileContentResponse representing the file contents.
         """
         ...
 
diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
index b54217619..9dac461db 100644
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreChunkingStrategyStaticConfig,
     VectorStoreDeleteResponse,
     VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFilesListInBatchResponse,
@@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
         provider = await self.routing_table.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_contents(
diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
index c6c80a01e..f95a4dbe3 100644
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
     SearchRankingOptions,
     VectorStoreChunkingStrategy,
     VectorStoreDeleteResponse,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
     VectorStoreFileDeleteResponse,
     VectorStoreFileObject,
     VectorStoreFileStatus,
@@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         await self.assert_action_allowed("read", "vector_store", vector_store_id)
         provider = await self.get_provider_impl(vector_store_id)
         return await provider.openai_retrieve_vector_store_file_contents(
diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index d047d9d12..86e6ea013 100644
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
     VectorStoreContent,
     VectorStoreDeleteResponse,
     VectorStoreFileBatchObject,
-    VectorStoreFileContentsResponse,
+    VectorStoreFileContentResponse,
     VectorStoreFileCounts,
     VectorStoreFileDeleteResponse,
     VectorStoreFileLastError,
@@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
         self,
         vector_store_id: str,
         file_id: str,
-    ) -> VectorStoreFileContentsResponse:
+    ) -> VectorStoreFileContentResponse:
         """Retrieves the contents of a vector store file."""
         if vector_store_id not in self.openai_vector_stores:
             raise VectorStoreNotFoundError(vector_store_id)
 
-        file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
         dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
         chunks = [Chunk.model_validate(c) for c in dict_chunks]
         content = []
         for chunk in chunks:
             content.extend(self._chunk_to_vector_store_content(chunk))
-        return VectorStoreFileContentsResponse(
-            file_id=file_id,
-            filename=file_info.get("filename", ""),
-            attributes=file_info.get("attributes", {}),
-            content=content,
+        return VectorStoreFileContentResponse(
+            object="vector_store.file_content.page",
+            data=content,
+            has_more=False,
+            next_page=None,
         )
 
     async def openai_update_vector_store_file(
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 97ce4abe8..20f9d2978 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
     )
 
     assert file_contents is not None
-    assert len(file_contents.content) == 1
-    content = file_contents.content[0]
+    assert file_contents.object == "vector_store.file_content.page"
+    assert len(file_contents.data) == 1
+    content = file_contents.data[0]
 
     # llama-stack-client returns a model, openai-python is a badboy and returns a dict
     if not isinstance(content, dict):
         content = content.model_dump()
     assert content["type"] == "text"
     assert content["text"] == test_content.decode("utf-8")
-    assert file_contents.filename == file_name
-    assert file_contents.attributes == attributes
+    assert file_contents.has_more is False
 
 
 @vector_provider_wrapper
@@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
         )
 
         assert file_contents is not None
-        assert file_contents.filename == file_data[i][0]
-        assert len(file_contents.content) > 0
+        assert file_contents.object == "vector_store.file_content.page"
+        assert len(file_contents.data) > 0
 
         # Verify the content matches what we uploaded
         content_text = (
-            file_contents.content[0].text
-            if hasattr(file_contents.content[0], "text")
-            else file_contents.content[0]["text"]
+            file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
         )
         assert file_data[i][1].decode("utf-8") in content_text