file content fix

# What does this PR do?


## Test Plan
This commit is contained in:
Eric Huang 2025-11-07 11:28:42 -08:00
parent a2c4c12384
commit 51714b4160
8 changed files with 93 additions and 114 deletions

View file

@ -2916,11 +2916,11 @@ paths:
responses:
'200':
description: >-
A list of InterleavedContent representing the file contents.
A VectorStoreFileContentResponse representing the file contents.
content:
application/json:
schema:
$ref: '#/components/schemas/VectorStoreFileContentsResponse'
$ref: '#/components/schemas/VectorStoreFileContentResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -10463,41 +10463,35 @@ components:
title: VectorStoreContent
description: >-
Content item from a vector store file or search result.
VectorStoreFileContentsResponse:
VectorStoreFileContentResponse:
type: object
properties:
file_id:
object:
type: string
description: Unique identifier for the file
filename:
type: string
description: Name of the file
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
const: vector_store.file_content.page
default: vector_store.file_content.page
description: >-
Key-value attributes associated with the file
content:
The object type, which is always `vector_store.file_content.page`
data:
type: array
items:
$ref: '#/components/schemas/VectorStoreContent'
description: List of content items from the file
description: Parsed content of the file
has_more:
type: boolean
description: >-
Indicates if there are more content pages to fetch
next_page:
type: string
description: The token for the next page, if any
additionalProperties: false
required:
- file_id
- filename
- attributes
- content
title: VectorStoreFileContentsResponse
- object
- data
- has_more
title: VectorStoreFileContentResponse
description: >-
Response from retrieving the contents of a vector store file.
Represents the parsed content of a vector store file.
OpenaiSearchVectorStoreRequest:
type: object
properties:

View file

@ -2913,11 +2913,11 @@ paths:
responses:
'200':
description: >-
A list of InterleavedContent representing the file contents.
A VectorStoreFileContentResponse representing the file contents.
content:
application/json:
schema:
$ref: '#/components/schemas/VectorStoreFileContentsResponse'
$ref: '#/components/schemas/VectorStoreFileContentResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -9747,41 +9747,35 @@ components:
title: VectorStoreContent
description: >-
Content item from a vector store file or search result.
VectorStoreFileContentsResponse:
VectorStoreFileContentResponse:
type: object
properties:
file_id:
object:
type: string
description: Unique identifier for the file
filename:
type: string
description: Name of the file
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
const: vector_store.file_content.page
default: vector_store.file_content.page
description: >-
Key-value attributes associated with the file
content:
The object type, which is always `vector_store.file_content.page`
data:
type: array
items:
$ref: '#/components/schemas/VectorStoreContent'
description: List of content items from the file
description: Parsed content of the file
has_more:
type: boolean
description: >-
Indicates if there are more content pages to fetch
next_page:
type: string
description: The token for the next page, if any
additionalProperties: false
required:
- file_id
- filename
- attributes
- content
title: VectorStoreFileContentsResponse
- object
- data
- has_more
title: VectorStoreFileContentResponse
description: >-
Response from retrieving the contents of a vector store file.
Represents the parsed content of a vector store file.
OpenaiSearchVectorStoreRequest:
type: object
properties:

View file

@ -2916,11 +2916,11 @@ paths:
responses:
'200':
description: >-
A list of InterleavedContent representing the file contents.
A VectorStoreFileContentResponse representing the file contents.
content:
application/json:
schema:
$ref: '#/components/schemas/VectorStoreFileContentsResponse'
$ref: '#/components/schemas/VectorStoreFileContentResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -10463,41 +10463,35 @@ components:
title: VectorStoreContent
description: >-
Content item from a vector store file or search result.
VectorStoreFileContentsResponse:
VectorStoreFileContentResponse:
type: object
properties:
file_id:
object:
type: string
description: Unique identifier for the file
filename:
type: string
description: Name of the file
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
const: vector_store.file_content.page
default: vector_store.file_content.page
description: >-
Key-value attributes associated with the file
content:
The object type, which is always `vector_store.file_content.page`
data:
type: array
items:
$ref: '#/components/schemas/VectorStoreContent'
description: List of content items from the file
description: Parsed content of the file
has_more:
type: boolean
description: >-
Indicates if there are more content pages to fetch
next_page:
type: string
description: The token for the next page, if any
additionalProperties: false
required:
- file_id
- filename
- attributes
- content
title: VectorStoreFileContentsResponse
- object
- data
- has_more
title: VectorStoreFileContentResponse
description: >-
Response from retrieving the contents of a vector store file.
Represents the parsed content of a vector store file.
OpenaiSearchVectorStoreRequest:
type: object
properties:

View file

@ -396,19 +396,19 @@ class VectorStoreListFilesResponse(BaseModel):
@json_schema_type
class VectorStoreFileContentsResponse(BaseModel):
"""Response from retrieving the contents of a vector store file.
class VectorStoreFileContentResponse(BaseModel):
"""Represents the parsed content of a vector store file.
:param file_id: Unique identifier for the file
:param filename: Name of the file
:param attributes: Key-value attributes associated with the file
:param content: List of content items from the file
:param object: The object type, which is always `vector_store.file_content.page`
:param data: Parsed content of the file
:param has_more: Indicates if there are more content pages to fetch
:param next_page: The token for the next page, if any
"""
file_id: str
filename: str
attributes: dict[str, Any]
content: list[VectorStoreContent]
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
data: list[VectorStoreContent]
has_more: bool
next_page: str | None = None
@json_schema_type
@ -732,12 +732,12 @@ class VectorIO(Protocol):
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file.
:param vector_store_id: The ID of the vector store containing the file to retrieve.
:param file_id: The ID of the file to retrieve.
:returns: A list of InterleavedContent representing the file contents.
:returns: A VectorStoreFileContentResponse representing the file contents.
"""
...

View file

@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategyStaticConfig,
VectorStoreDeleteResponse,
VectorStoreFileBatchObject,
VectorStoreFileContentsResponse,
VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse,
VectorStoreFileObject,
VectorStoreFilesListInBatchResponse,
@ -338,7 +338,7 @@ class VectorIORouter(VectorIO):
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
) -> VectorStoreFileContentResponse:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(

View file

@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
SearchRankingOptions,
VectorStoreChunkingStrategy,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse,
VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse,
VectorStoreFileObject,
VectorStoreFileStatus,
@ -195,7 +195,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
) -> VectorStoreFileContentResponse:
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(

View file

@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
VectorStoreContent,
VectorStoreDeleteResponse,
VectorStoreFileBatchObject,
VectorStoreFileContentsResponse,
VectorStoreFileContentResponse,
VectorStoreFileCounts,
VectorStoreFileDeleteResponse,
VectorStoreFileLastError,
@ -921,22 +921,21 @@ class OpenAIVectorStoreMixin(ABC):
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file."""
if vector_store_id not in self.openai_vector_stores:
raise VectorStoreNotFoundError(vector_store_id)
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks]
content = []
for chunk in chunks:
content.extend(self._chunk_to_vector_store_content(chunk))
return VectorStoreFileContentsResponse(
file_id=file_id,
filename=file_info.get("filename", ""),
attributes=file_info.get("attributes", {}),
content=content,
return VectorStoreFileContentResponse(
object="vector_store.file_content.page",
data=content,
has_more=False,
next_page=None,
)
async def openai_update_vector_store_file(

View file

@ -907,16 +907,16 @@ def test_openai_vector_store_retrieve_file_contents(
)
assert file_contents is not None
assert len(file_contents.content) == 1
content = file_contents.content[0]
assert file_contents.object == "vector_store.file_content.page"
assert len(file_contents.data) == 1
content = file_contents.data[0]
# llama-stack-client returns a model, openai-python is a badboy and returns a dict
if not isinstance(content, dict):
content = content.model_dump()
assert content["type"] == "text"
assert content["text"] == test_content.decode("utf-8")
assert file_contents.filename == file_name
assert file_contents.attributes == attributes
assert file_contents.has_more is False
@vector_provider_wrapper
@ -1483,14 +1483,12 @@ def test_openai_vector_store_file_batch_retrieve_contents(
)
assert file_contents is not None
assert file_contents.filename == file_data[i][0]
assert len(file_contents.content) > 0
assert file_contents.object == "vector_store.file_content.page"
assert len(file_contents.data) > 0
# Verify the content matches what we uploaded
content_text = (
file_contents.content[0].text
if hasattr(file_contents.content[0], "text")
else file_contents.content[0]["text"]
file_contents.data[0].text if hasattr(file_contents.data[0], "text") else file_contents.data[0]["text"]
)
assert file_data[i][1].decode("utf-8") in content_text