file content fix

# What does this PR do?


## Test Plan
This commit is contained in:
Eric Huang 2025-11-07 11:27:17 -08:00
parent a2c4c12384
commit 0d65bbc9e4
8 changed files with 7094 additions and 9192 deletions

View file

@ -2916,11 +2916,11 @@ paths:
responses: responses:
'200': '200':
description: >- description: >-
A list of InterleavedContent representing the file contents. Represents the parsed content of a vector store file.
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/VectorStoreFileContentsResponse' $ref: '#/components/schemas/VectorStoreFileContentResponse'
'400': '400':
$ref: '#/components/responses/BadRequest400' $ref: '#/components/responses/BadRequest400'
'429': '429':
@ -10463,41 +10463,36 @@ components:
title: VectorStoreContent title: VectorStoreContent
description: >- description: >-
Content item from a vector store file or search result. Content item from a vector store file or search result.
VectorStoreFileContentsResponse: VectorStoreFileContentResponse:
type: object type: object
description: Represents the parsed content of a vector store file.
properties: properties:
file_id: object:
type: string type: string
description: Unique identifier for the file enum:
filename: - vector_store.file_content.page
type: string description: The object type, which is always `vector_store.file_content.page`
description: Name of the file x-stainless-const: true
attributes: data:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
Key-value attributes associated with the file
content:
type: array type: array
description: Parsed content of the file.
items: items:
$ref: '#/components/schemas/VectorStoreContent' $ref: '#/components/schemas/VectorStoreContent'
description: List of content items from the file has_more:
type: boolean
description: Indicates if there are more content pages to fetch.
next_page:
anyOf:
- type: string
description: The token for the next page, if any.
- type: 'null'
additionalProperties: false additionalProperties: false
required: required:
- file_id - object
- filename - data
- attributes - has_more
- content - next_page
title: VectorStoreFileContentsResponse title: VectorStoreFileContentResponse
description: >-
Response from retrieving the contents of a vector store file.
OpenaiSearchVectorStoreRequest: OpenaiSearchVectorStoreRequest:
type: object type: object
properties: properties:

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):
@json_schema_type @json_schema_type
class VectorStoreFileContentsResponse(BaseModel): class VectorStoreFileContentResponse(BaseModel):
"""Response from retrieving the contents of a vector store file. """Represents the parsed content of a vector store file.
:param file_id: Unique identifier for the file :param object: The object type, which is always `vector_store.file_content.page`
:param filename: Name of the file :param data: Parsed content of the file
:param attributes: Key-value attributes associated with the file :param has_more: Indicates if there are more content pages to fetch
:param content: List of content items from the file :param next_page: The token for the next page, if any
""" """
file_id: str object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
filename: str data: list[VectorStoreContent]
attributes: dict[str, Any] has_more: bool
content: list[VectorStoreContent] next_page: str | None = None
@json_schema_type @json_schema_type
@ -732,12 +732,12 @@ class VectorIO(Protocol):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file. """Retrieves the contents of a vector store file.
:param vector_store_id: The ID of the vector store containing the file to retrieve. :param vector_store_id: The ID of the vector store containing the file to retrieve.
:param file_id: The ID of the file to retrieve. :param file_id: The ID of the file to retrieve.
:returns: A list of InterleavedContent representing the file contents. :returns: A VectorStoreFileContentResponse representing the file contents.
""" """
... ...

View file

@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategyStaticConfig, VectorStoreChunkingStrategyStaticConfig,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFilesListInBatchResponse, VectorStoreFilesListInBatchResponse,
@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
provider = await self.routing_table.get_provider_impl(vector_store_id) provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents( return await provider.openai_retrieve_vector_store_file_contents(

View file

@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
SearchRankingOptions, SearchRankingOptions,
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFileStatus, VectorStoreFileStatus,
@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
await self.assert_action_allowed("read", "vector_store", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents( return await provider.openai_retrieve_vector_store_file_contents(

View file

@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
VectorStoreContent, VectorStoreContent,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileCounts, VectorStoreFileCounts,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileLastError, VectorStoreFileLastError,
@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
self, self,
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file.""" """Retrieves the contents of a vector store file."""
if vector_store_id not in self.openai_vector_stores: if vector_store_id not in self.openai_vector_stores:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks] chunks = [Chunk.model_validate(c) for c in dict_chunks]
content = [] content = []
for chunk in chunks: for chunk in chunks:
content.extend(self._chunk_to_vector_store_content(chunk)) content.extend(self._chunk_to_vector_store_content(chunk))
return VectorStoreFileContentsResponse( return VectorStoreFileContentResponse(
file_id=file_id, object="vector_store.file_content.page",
filename=file_info.get("filename", ""), data=content,
attributes=file_info.get("attributes", {}), has_more=False,
content=content, next_page=None,
) )
async def openai_update_vector_store_file( async def openai_update_vector_store_file(

View file

@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
) )
assert file_contents is not None assert file_contents is not None
assert len(file_contents.content) == 1 assert file_contents.object == "vector_store.file_content.page"
content = file_contents.content[0] assert len(file_contents.data) == 1
content = file_contents.data[0]
# llama-stack-client returns a model, openai-python is a badboy and returns a dict # llama-stack-client returns a model, openai-python is a badboy and returns a dict
if not isinstance(content, dict): if not isinstance(content, dict):
content = content.model_dump() content = content.model_dump()
assert content["type"] == "text" assert content["type"] == "text"
assert content["text"] == test_content.decode("utf-8") assert content["text"] == test_content.decode("utf-8")
assert file_contents.filename == file_name assert file_contents.has_more is False
assert file_contents.attributes == attributes
@vector_provider_wrapper @vector_provider_wrapper
@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
) )
assert file_contents is not None assert file_contents is not None
assert file_contents.filename == file_data[i][0] assert file_contents.object == "vector_store.file_content.page"
assert len(file_contents.content) > 0 assert len(file_contents.data) > 0
# Verify the content matches what we uploaded # Verify the content matches what we uploaded
content_text = ( content_text = (
file_contents.content[0].text file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
if hasattr(file_contents.content[0], "text")
else file_contents.content[0]["text"]
) )
assert file_data[i][1].decode("utf-8") in content_text assert file_data[i][1].decode("utf-8") in content_text