rebase and incorporate PR feedback

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Javier Arceo 2025-11-10 22:38:17 -05:00
parent 726bdc414d
commit 765f7623c1
7 changed files with 49 additions and 64 deletions

View file

@ -2691,16 +2691,8 @@ paths:
responses: responses:
'200': '200':
description: >- description: >-
<<<<<<< HEAD
<<<<<<< HEAD
A VectorStoreFileContentResponse representing the file contents.
=======
File contents, optionally with embeddings and metadata based on extra_query
=======
File contents, optionally with embeddings and metadata based on query File contents, optionally with embeddings and metadata based on query
>>>>>>> c192529c (use FastAPI Query class instead of custom middlware)
parameters. parameters.
>>>>>>> 639f0daa (feat: Adding optional embeddings to content)
content: content:
application/json: application/json:
schema: schema:
@ -10132,11 +10124,10 @@ components:
items: items:
type: number type: number
description: >- description: >-
Optional embedding vector for this content chunk (when requested via extra_body) Optional embedding vector for this content chunk
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: Optional chunk metadata
Optional chunk metadata (when requested via extra_body)
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -10147,8 +10138,7 @@ components:
- type: string - type: string
- type: array - type: array
- type: object - type: object
description: >- description: Optional user-defined metadata
Optional user-defined metadata (when requested via extra_body)
additionalProperties: false additionalProperties: false
required: required:
- type - type
@ -10172,6 +10162,7 @@ components:
description: Parsed content of the file description: Parsed content of the file
has_more: has_more:
type: boolean type: boolean
default: false
description: >- description: >-
Indicates if there are more content pages to fetch Indicates if there are more content pages to fetch
next_page: next_page:

View file

@ -2688,16 +2688,8 @@ paths:
responses: responses:
'200': '200':
description: >- description: >-
<<<<<<< HEAD
<<<<<<< HEAD
A VectorStoreFileContentResponse representing the file contents.
=======
File contents, optionally with embeddings and metadata based on extra_query
=======
File contents, optionally with embeddings and metadata based on query File contents, optionally with embeddings and metadata based on query
>>>>>>> c192529c (use FastAPI Query class instead of custom middlware)
parameters. parameters.
>>>>>>> 639f0daa (feat: Adding optional embeddings to content)
content: content:
application/json: application/json:
schema: schema:
@ -9416,11 +9408,10 @@ components:
items: items:
type: number type: number
description: >- description: >-
Optional embedding vector for this content chunk (when requested via extra_body) Optional embedding vector for this content chunk
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: Optional chunk metadata
Optional chunk metadata (when requested via extra_body)
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -9431,8 +9422,7 @@ components:
- type: string - type: string
- type: array - type: array
- type: object - type: object
description: >- description: Optional user-defined metadata
Optional user-defined metadata (when requested via extra_body)
additionalProperties: false additionalProperties: false
required: required:
- type - type
@ -9456,6 +9446,7 @@ components:
description: Parsed content of the file description: Parsed content of the file
has_more: has_more:
type: boolean type: boolean
default: false
description: >- description: >-
Indicates if there are more content pages to fetch Indicates if there are more content pages to fetch
next_page: next_page:

View file

@ -2691,16 +2691,8 @@ paths:
responses: responses:
'200': '200':
description: >- description: >-
<<<<<<< HEAD
<<<<<<< HEAD
A VectorStoreFileContentResponse representing the file contents.
=======
File contents, optionally with embeddings and metadata based on extra_query
=======
File contents, optionally with embeddings and metadata based on query File contents, optionally with embeddings and metadata based on query
>>>>>>> c192529c (use FastAPI Query class instead of custom middlware)
parameters. parameters.
>>>>>>> 639f0daa (feat: Adding optional embeddings to content)
content: content:
application/json: application/json:
schema: schema:
@ -10132,11 +10124,10 @@ components:
items: items:
type: number type: number
description: >- description: >-
Optional embedding vector for this content chunk (when requested via extra_body) Optional embedding vector for this content chunk
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: Optional chunk metadata
Optional chunk metadata (when requested via extra_body)
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -10147,8 +10138,7 @@ components:
- type: string - type: string
- type: array - type: array
- type: object - type: object
description: >- description: Optional user-defined metadata
Optional user-defined metadata (when requested via extra_body)
additionalProperties: false additionalProperties: false
required: required:
- type - type
@ -10172,6 +10162,7 @@ components:
description: Parsed content of the file description: Parsed content of the file
has_more: has_more:
type: boolean type: boolean
default: false
description: >- description: >-
Indicates if there are more content pages to fetch Indicates if there are more content pages to fetch
next_page: next_page:

View file

@ -224,9 +224,9 @@ class VectorStoreContent(BaseModel):
:param type: Content type, currently only "text" is supported :param type: Content type, currently only "text" is supported
:param text: The actual text content :param text: The actual text content
:param embedding: Optional embedding vector for this content chunk (when requested via extra_body) :param embedding: Optional embedding vector for this content chunk
:param chunk_metadata: Optional chunk metadata (when requested via extra_body) :param chunk_metadata: Optional chunk metadata
:param metadata: Optional user-defined metadata (when requested via extra_body) :param metadata: Optional user-defined metadata
""" """
type: Literal["text"] type: Literal["text"]
@ -286,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel):
deleted: bool = True deleted: bool = True
@json_schema_type
class VectorStoreFileContentResponse(BaseModel):
"""Represents the parsed content of a vector store file.
:param object: The object type, which is always `vector_store.file_content.page`
:param data: Parsed content of the file
:param has_more: Indicates if there are more content pages to fetch
:param next_page: The token for the next page, if any
"""
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
data: list[VectorStoreContent]
has_more: bool = False
next_page: str | None = None
@json_schema_type @json_schema_type
class VectorStoreChunkingStrategyAuto(BaseModel): class VectorStoreChunkingStrategyAuto(BaseModel):
"""Automatic chunking strategy for vector store files. """Automatic chunking strategy for vector store files.
@ -724,7 +740,7 @@ class VectorIO(Protocol):
file_id: str, file_id: str,
include_embeddings: Annotated[bool | None, Query(default=False)] = False, include_embeddings: Annotated[bool | None, Query(default=False)] = False,
include_metadata: Annotated[bool | None, Query(default=False)] = False, include_metadata: Annotated[bool | None, Query(default=False)] = False,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file. """Retrieves the contents of a vector store file.
:param vector_store_id: The ID of the vector store containing the file to retrieve. :param vector_store_id: The ID of the vector store containing the file to retrieve.

View file

@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategyStaticConfig, VectorStoreChunkingStrategyStaticConfig,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFilesListInBatchResponse, VectorStoreFilesListInBatchResponse,
@ -347,7 +347,7 @@ class VectorIORouter(VectorIO):
file_id: str, file_id: str,
include_embeddings: bool | None = False, include_embeddings: bool | None = False,
include_metadata: bool | None = False, include_metadata: bool | None = False,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
logger.debug( logger.debug(
f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, " f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
f"include_embeddings={include_embeddings}, include_metadata={include_metadata}" f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"

View file

@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import (
SearchRankingOptions, SearchRankingOptions,
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileObject, VectorStoreFileObject,
VectorStoreFileStatus, VectorStoreFileStatus,
@ -197,7 +197,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
file_id: str, file_id: str,
include_embeddings: bool | None = False, include_embeddings: bool | None = False,
include_metadata: bool | None = False, include_metadata: bool | None = False,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
await self.assert_action_allowed("read", "vector_store", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)

View file

@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import (
VectorStoreContent, VectorStoreContent,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
VectorStoreFileBatchObject, VectorStoreFileBatchObject,
VectorStoreFileContentsResponse, VectorStoreFileContentResponse,
VectorStoreFileCounts, VectorStoreFileCounts,
VectorStoreFileDeleteResponse, VectorStoreFileDeleteResponse,
VectorStoreFileLastError, VectorStoreFileLastError,
@ -704,7 +704,10 @@ class OpenAIVectorStoreMixin(ABC):
# Unknown filter type, default to no match # Unknown filter type, default to no match
raise ValueError(f"Unsupported filter type: {filter_type}") raise ValueError(f"Unsupported filter type: {filter_type}")
def _extract_chunk_fields(self, chunk: Chunk, include_embeddings: bool, include_metadata: bool) -> dict: def _chunk_to_vector_store_content(
self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False
) -> list[VectorStoreContent]:
def extract_fields() -> dict:
"""Extract embedding and metadata fields from chunk based on include flags.""" """Extract embedding and metadata fields from chunk based on include flags."""
return { return {
"embedding": chunk.embedding if include_embeddings else None, "embedding": chunk.embedding if include_embeddings else None,
@ -712,10 +715,7 @@ class OpenAIVectorStoreMixin(ABC):
"metadata": chunk.metadata if include_metadata else None, "metadata": chunk.metadata if include_metadata else None,
} }
def _chunk_to_vector_store_content( fields = extract_fields()
self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False
) -> list[VectorStoreContent]:
fields = self._extract_chunk_fields(chunk, include_embeddings, include_metadata)
if isinstance(chunk.content, str): if isinstance(chunk.content, str):
content_item = VectorStoreContent(type="text", text=chunk.content, **fields) content_item = VectorStoreContent(type="text", text=chunk.content, **fields)
@ -923,7 +923,7 @@ class OpenAIVectorStoreMixin(ABC):
file_id: str, file_id: str,
include_embeddings: bool | None = False, include_embeddings: bool | None = False,
include_metadata: bool | None = False, include_metadata: bool | None = False,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file.""" """Retrieves the contents of a vector store file."""
if vector_store_id not in self.openai_vector_stores: if vector_store_id not in self.openai_vector_stores:
raise VectorStoreNotFoundError(vector_store_id) raise VectorStoreNotFoundError(vector_store_id)
@ -931,7 +931,6 @@ class OpenAIVectorStoreMixin(ABC):
# Parameters are already provided directly # Parameters are already provided directly
# include_embeddings and include_metadata are now function parameters # include_embeddings and include_metadata are now function parameters
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks] chunks = [Chunk.model_validate(c) for c in dict_chunks]
content = [] content = []
@ -941,11 +940,8 @@ class OpenAIVectorStoreMixin(ABC):
chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False
) )
) )
return VectorStoreFileContentsResponse( return VectorStoreFileContentResponse(
file_id=file_id, data=content,
filename=file_info.get("filename", ""),
attributes=file_info.get("attributes", {}),
content=content,
) )
async def openai_update_vector_store_file( async def openai_update_vector_store_file(