From d4ecbfd092a7502b4b3ffffbbc3df75c8c38862d Mon Sep 17 00:00:00 2001 From: ehhuang Date: Mon, 10 Nov 2025 10:16:35 -0800 Subject: [PATCH] fix(vector store)!: fix file content API (#4105) # What does this PR do? - changed to match https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml ## Test Plan updated test CI --- client-sdks/stainless/openapi.yml | 48 ++++++++----------- docs/static/llama-stack-spec.yaml | 48 ++++++++----------- docs/static/stainless-llama-stack-spec.yaml | 48 ++++++++----------- src/llama_stack/apis/vector_io/vector_io.py | 24 +++++----- src/llama_stack/core/routers/vector_io.py | 4 +- .../core/routing_tables/vector_stores.py | 4 +- .../utils/memory/openai_vector_store_mixin.py | 15 +++--- .../vector_io/test_openai_vector_stores.py | 16 +++---- 8 files changed, 93 insertions(+), 114 deletions(-) diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index d8159be62..adee2f086 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -2916,11 +2916,11 @@ paths: responses: '200': description: >- - A list of InterleavedContent representing the file contents. + A VectorStoreFileContentResponse representing the file contents. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileContentsResponse' + $ref: '#/components/schemas/VectorStoreFileContentResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -10465,41 +10465,35 @@ components: title: VectorStoreContent description: >- Content item from a vector store file or search result. - VectorStoreFileContentsResponse: + VectorStoreFileContentResponse: type: object properties: - file_id: + object: type: string - description: Unique identifier for the file - filename: - type: string - description: Name of the file - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + const: vector_store.file_content.page + default: vector_store.file_content.page description: >- - Key-value attributes associated with the file - content: + The object type, which is always `vector_store.file_content.page` + data: type: array items: $ref: '#/components/schemas/VectorStoreContent' - description: List of content items from the file + description: Parsed content of the file + has_more: + type: boolean + description: >- + Indicates if there are more content pages to fetch + next_page: + type: string + description: The token for the next page, if any additionalProperties: false required: - - file_id - - filename - - attributes - - content - title: VectorStoreFileContentsResponse + - object + - data + - has_more + title: VectorStoreFileContentResponse description: >- - Response from retrieving the contents of a vector store file. + Represents the parsed content of a vector store file. OpenaiSearchVectorStoreRequest: type: object properties: diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ea7fd6eec..72600bf13 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2913,11 +2913,11 @@ paths: responses: '200': description: >- - A list of InterleavedContent representing the file contents. + A VectorStoreFileContentResponse representing the file contents. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileContentsResponse' + $ref: '#/components/schemas/VectorStoreFileContentResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -9749,41 +9749,35 @@ components: title: VectorStoreContent description: >- Content item from a vector store file or search result. - VectorStoreFileContentsResponse: + VectorStoreFileContentResponse: type: object properties: - file_id: + object: type: string - description: Unique identifier for the file - filename: - type: string - description: Name of the file - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + const: vector_store.file_content.page + default: vector_store.file_content.page description: >- - Key-value attributes associated with the file - content: + The object type, which is always `vector_store.file_content.page` + data: type: array items: $ref: '#/components/schemas/VectorStoreContent' - description: List of content items from the file + description: Parsed content of the file + has_more: + type: boolean + description: >- + Indicates if there are more content pages to fetch + next_page: + type: string + description: The token for the next page, if any additionalProperties: false required: - - file_id - - filename - - attributes - - content - title: VectorStoreFileContentsResponse + - object + - data + - has_more + title: VectorStoreFileContentResponse description: >- - Response from retrieving the contents of a vector store file. + Represents the parsed content of a vector store file. OpenaiSearchVectorStoreRequest: type: object properties: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index d8159be62..adee2f086 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2916,11 +2916,11 @@ paths: responses: '200': description: >- - A list of InterleavedContent representing the file contents. + A VectorStoreFileContentResponse representing the file contents. content: application/json: schema: - $ref: '#/components/schemas/VectorStoreFileContentsResponse' + $ref: '#/components/schemas/VectorStoreFileContentResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -10465,41 +10465,35 @@ components: title: VectorStoreContent description: >- Content item from a vector store file or search result. - VectorStoreFileContentsResponse: + VectorStoreFileContentResponse: type: object properties: - file_id: + object: type: string - description: Unique identifier for the file - filename: - type: string - description: Name of the file - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object + const: vector_store.file_content.page + default: vector_store.file_content.page description: >- - Key-value attributes associated with the file - content: + The object type, which is always `vector_store.file_content.page` + data: type: array items: $ref: '#/components/schemas/VectorStoreContent' - description: List of content items from the file + description: Parsed content of the file + has_more: + type: boolean + description: >- + Indicates if there are more content pages to fetch + next_page: + type: string + description: The token for the next page, if any additionalProperties: false required: - - file_id - - filename - - attributes - - content - title: VectorStoreFileContentsResponse + - object + - data + - has_more + title: VectorStoreFileContentResponse description: >- - Response from retrieving the contents of a vector store file. + Represents the parsed content of a vector store file. OpenaiSearchVectorStoreRequest: type: object properties: diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py index 26c961db3..846c6f191 100644 --- a/src/llama_stack/apis/vector_io/vector_io.py +++ b/src/llama_stack/apis/vector_io/vector_io.py @@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel): @json_schema_type -class VectorStoreFileContentsResponse(BaseModel): - """Response from retrieving the contents of a vector store file. +class VectorStoreFileContentResponse(BaseModel): + """Represents the parsed content of a vector store file. - :param file_id: Unique identifier for the file - :param filename: Name of the file - :param attributes: Key-value attributes associated with the file - :param content: List of content items from the file + :param object: The object type, which is always `vector_store.file_content.page` + :param data: Parsed content of the file + :param has_more: Indicates if there are more content pages to fetch + :param next_page: The token for the next page, if any """ - file_id: str - filename: str - attributes: dict[str, Any] - content: list[VectorStoreContent] + object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page" + data: list[VectorStoreContent] + has_more: bool + next_page: str | None = None @json_schema_type @@ -732,12 +732,12 @@ class VectorIO(Protocol): self, vector_store_id: str, file_id: str, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file. :param vector_store_id: The ID of the vector store containing the file to retrieve. :param file_id: The ID of the file to retrieve. - :returns: A list of InterleavedContent representing the file contents. + :returns: A VectorStoreFileContentResponse representing the file contents. """ ... diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index b54217619..9dac461db 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategyStaticConfig, VectorStoreDeleteResponse, VectorStoreFileBatchObject, - VectorStoreFileContentsResponse, + VectorStoreFileContentResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFilesListInBatchResponse, @@ -338,7 +338,7 @@ class VectorIORouter(VectorIO): self, vector_store_id: str, file_id: str, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") provider = await self.routing_table.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_contents( diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py index c6c80a01e..f95a4dbe3 100644 --- a/src/llama_stack/core/routing_tables/vector_stores.py +++ b/src/llama_stack/core/routing_tables/vector_stores.py @@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import ( SearchRankingOptions, VectorStoreChunkingStrategy, VectorStoreDeleteResponse, - VectorStoreFileContentsResponse, + VectorStoreFileContentResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFileStatus, @@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl): self, vector_store_id: str, file_id: str, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_contents( diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index d047d9d12..86e6ea013 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import ( VectorStoreContent, VectorStoreDeleteResponse, VectorStoreFileBatchObject, - VectorStoreFileContentsResponse, + VectorStoreFileContentResponse, VectorStoreFileCounts, VectorStoreFileDeleteResponse, VectorStoreFileLastError, @@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC): self, vector_store_id: str, file_id: str, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) - file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) chunks = [Chunk.model_validate(c) for c in dict_chunks] content = [] for chunk in chunks: content.extend(self._chunk_to_vector_store_content(chunk)) - return VectorStoreFileContentsResponse( - file_id=file_id, - filename=file_info.get("filename", ""), - attributes=file_info.get("attributes", {}), - content=content, + return VectorStoreFileContentResponse( + object="vector_store.file_content.page", + data=content, + has_more=False, + next_page=None, ) async def openai_update_vector_store_file( diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 97ce4abe8..20f9d2978 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents( ) assert file_contents is not None - assert len(file_contents.content) == 1 - content = file_contents.content[0] + assert file_contents.object == "vector_store.file_content.page" + assert len(file_contents.data) == 1 + content = file_contents.data[0] # llama-stack-client returns a model, openai-python is a badboy and returns a dict if not isinstance(content, dict): content = content.model_dump() assert content["type"] == "text" assert content["text"] == test_content.decode("utf-8") - assert file_contents.filename == file_name - assert file_contents.attributes == attributes + assert file_contents.has_more is False @vector_provider_wrapper @@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents( ) assert file_contents is not None - assert file_contents.filename == file_data[i][0] - assert len(file_contents.content) > 0 + assert file_contents.object == "vector_store.file_content.page" + assert len(file_contents.data) > 0 # Verify the content matches what we uploaded content_text = ( - file_contents.content[0].text - if hasattr(file_contents.content[0], "text") - else file_contents.content[0]["text"] + file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"] ) assert file_data[i][1].decode("utf-8") in content_text