diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index e9dae4bd0..1be4af6c9 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -2691,16 +2691,8 @@ paths: responses: '200': description: >- -<<<<<<< HEAD -<<<<<<< HEAD - A VectorStoreFileContentResponse representing the file contents. -======= - File contents, optionally with embeddings and metadata based on extra_query -======= File contents, optionally with embeddings and metadata based on query ->>>>>>> c192529c (use FastAPI Query class instead of custom middlware) parameters. ->>>>>>> 639f0daa (feat: Adding optional embeddings to content) content: application/json: schema: @@ -10132,11 +10124,10 @@ components: items: type: number description: >- - Optional embedding vector for this content chunk (when requested via extra_body) + Optional embedding vector for this content chunk chunk_metadata: $ref: '#/components/schemas/ChunkMetadata' - description: >- - Optional chunk metadata (when requested via extra_body) + description: Optional chunk metadata metadata: type: object additionalProperties: @@ -10147,8 +10138,7 @@ components: - type: string - type: array - type: object - description: >- - Optional user-defined metadata (when requested via extra_body) + description: Optional user-defined metadata additionalProperties: false required: - type @@ -10172,6 +10162,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 8cf1d1c89..66eda78c7 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2688,16 +2688,8 @@ paths: responses: '200': description: >- -<<<<<<< HEAD -<<<<<<< HEAD - A VectorStoreFileContentResponse representing the file contents. -======= - File contents, optionally with embeddings and metadata based on extra_query -======= File contents, optionally with embeddings and metadata based on query ->>>>>>> c192529c (use FastAPI Query class instead of custom middlware) parameters. ->>>>>>> 639f0daa (feat: Adding optional embeddings to content) content: application/json: schema: @@ -9416,11 +9408,10 @@ components: items: type: number description: >- - Optional embedding vector for this content chunk (when requested via extra_body) + Optional embedding vector for this content chunk chunk_metadata: $ref: '#/components/schemas/ChunkMetadata' - description: >- - Optional chunk metadata (when requested via extra_body) + description: Optional chunk metadata metadata: type: object additionalProperties: @@ -9431,8 +9422,7 @@ components: - type: string - type: array - type: object - description: >- - Optional user-defined metadata (when requested via extra_body) + description: Optional user-defined metadata additionalProperties: false required: - type @@ -9456,6 +9446,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index e9dae4bd0..1be4af6c9 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2691,16 +2691,8 @@ paths: responses: '200': description: >- -<<<<<<< HEAD -<<<<<<< HEAD - A VectorStoreFileContentResponse representing the file contents. -======= - File contents, optionally with embeddings and metadata based on extra_query -======= File contents, optionally with embeddings and metadata based on query ->>>>>>> c192529c (use FastAPI Query class instead of custom middlware) parameters. ->>>>>>> 639f0daa (feat: Adding optional embeddings to content) content: application/json: schema: @@ -10132,11 +10124,10 @@ components: items: type: number description: >- - Optional embedding vector for this content chunk (when requested via extra_body) + Optional embedding vector for this content chunk chunk_metadata: $ref: '#/components/schemas/ChunkMetadata' - description: >- - Optional chunk metadata (when requested via extra_body) + description: Optional chunk metadata metadata: type: object additionalProperties: @@ -10147,8 +10138,7 @@ components: - type: string - type: array - type: object - description: >- - Optional user-defined metadata (when requested via extra_body) + description: Optional user-defined metadata additionalProperties: false required: - type @@ -10172,6 +10162,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py index fdc7a36db..699241128 100644 --- a/src/llama_stack/apis/vector_io/vector_io.py +++ b/src/llama_stack/apis/vector_io/vector_io.py @@ -224,9 +224,9 @@ class VectorStoreContent(BaseModel): :param type: Content type, currently only "text" is supported :param text: The actual text content - :param embedding: Optional embedding vector for this content chunk (when requested via extra_body) - :param chunk_metadata: Optional chunk metadata (when requested via extra_body) - :param metadata: Optional user-defined metadata (when requested via extra_body) + :param embedding: Optional embedding vector for this content chunk + :param chunk_metadata: Optional chunk metadata + :param metadata: Optional user-defined metadata """ type: Literal["text"] @@ -286,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel): deleted: bool = True +@json_schema_type +class VectorStoreFileContentResponse(BaseModel): + """Represents the parsed content of a vector store file. + + :param object: The object type, which is always `vector_store.file_content.page` + :param data: Parsed content of the file + :param has_more: Indicates if there are more content pages to fetch + :param next_page: The token for the next page, if any + """ + + object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page" + data: list[VectorStoreContent] + has_more: bool = False + next_page: str | None = None + + @json_schema_type class VectorStoreChunkingStrategyAuto(BaseModel): """Automatic chunking strategy for vector store files. @@ -724,7 +740,7 @@ class VectorIO(Protocol): file_id: str, include_embeddings: Annotated[bool | None, Query(default=False)] = False, include_metadata: Annotated[bool | None, Query(default=False)] = False, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file. :param vector_store_id: The ID of the vector store containing the file to retrieve. diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index 15c7bb5d0..ed5fb8253 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -24,7 +24,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategyStaticConfig, VectorStoreDeleteResponse, VectorStoreFileBatchObject, - VectorStoreFileContentsResponse, + VectorStoreFileContentResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFilesListInBatchResponse, @@ -347,7 +347,7 @@ class VectorIORouter(VectorIO): file_id: str, include_embeddings: bool | None = False, include_metadata: bool | None = False, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: logger.debug( f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, " f"include_embeddings={include_embeddings}, include_metadata={include_metadata}" diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py index 649df934e..e77739abe 100644 --- a/src/llama_stack/core/routing_tables/vector_stores.py +++ b/src/llama_stack/core/routing_tables/vector_stores.py @@ -15,7 +15,7 @@ from llama_stack.apis.vector_io.vector_io import ( SearchRankingOptions, VectorStoreChunkingStrategy, VectorStoreDeleteResponse, - VectorStoreFileContentsResponse, + VectorStoreFileContentResponse, VectorStoreFileDeleteResponse, VectorStoreFileObject, VectorStoreFileStatus, @@ -197,7 +197,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl): file_id: str, include_embeddings: bool | None = False, include_metadata: bool | None = False, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 3777088de..853245598 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -30,7 +30,7 @@ from llama_stack.apis.vector_io import ( VectorStoreContent, VectorStoreDeleteResponse, VectorStoreFileBatchObject, - VectorStoreFileContentsResponse, + VectorStoreFileContentResponse, VectorStoreFileCounts, VectorStoreFileDeleteResponse, VectorStoreFileLastError, @@ -704,18 +704,18 @@ class OpenAIVectorStoreMixin(ABC): # Unknown filter type, default to no match raise ValueError(f"Unsupported filter type: {filter_type}") - def _extract_chunk_fields(self, chunk: Chunk, include_embeddings: bool, include_metadata: bool) -> dict: - """Extract embedding and metadata fields from chunk based on include flags.""" - return { - "embedding": chunk.embedding if include_embeddings else None, - "chunk_metadata": chunk.chunk_metadata if include_metadata else None, - "metadata": chunk.metadata if include_metadata else None, - } - def _chunk_to_vector_store_content( self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False ) -> list[VectorStoreContent]: - fields = self._extract_chunk_fields(chunk, include_embeddings, include_metadata) + def extract_fields() -> dict: + """Extract embedding and metadata fields from chunk based on include flags.""" + return { + "embedding": chunk.embedding if include_embeddings else None, + "chunk_metadata": chunk.chunk_metadata if include_metadata else None, + "metadata": chunk.metadata if include_metadata else None, + } + + fields = extract_fields() if isinstance(chunk.content, str): content_item = VectorStoreContent(type="text", text=chunk.content, **fields) @@ -923,7 +923,7 @@ class OpenAIVectorStoreMixin(ABC): file_id: str, include_embeddings: bool | None = False, include_metadata: bool | None = False, - ) -> VectorStoreFileContentsResponse: + ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) @@ -931,7 +931,6 @@ class OpenAIVectorStoreMixin(ABC): # Parameters are already provided directly # include_embeddings and include_metadata are now function parameters - file_info = await self._load_openai_vector_store_file(vector_store_id, file_id) dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) chunks = [Chunk.model_validate(c) for c in dict_chunks] content = [] @@ -941,11 +940,8 @@ class OpenAIVectorStoreMixin(ABC): chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False ) ) - return VectorStoreFileContentsResponse( - file_id=file_id, - filename=file_info.get("filename", ""), - attributes=file_info.get("attributes", {}), - content=content, + return VectorStoreFileContentResponse( + data=content, ) async def openai_update_vector_store_file(