mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-04 02:03:44 +00:00
feat: allow returning embeddings and metadata from /vector_stores/ methods; disallow changing Provider ID (#4046)
# What does this PR do?
- Updates `/vector_stores/{vector_store_id}/files/{file_id}/content` to
allow returning `embeddings` and `metadata` using the `extra_query`
- Updates the UI accordingly to display them.
- Update UI to support CRUD operations in the Vector Stores section and
adds a new modal exposing the functionality.
- Updates Vector Store update to fail if a user tries to update Provider
ID (which doesn't make sense to allow)
```python
In [1]: client.vector_stores.files.content(
vector_store_id=vector_store.id,
file_id=file.id,
extra_query={"include_embeddings": True, "include_metadata": True}
)
Out [1]: FileContentResponse(attributes={}, content=[Content(text='This is a test document to check if embeddings are generated properly.\n', type='text', embedding=[0.33760684728622437, ...,], chunk_metadata={'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'source': None, 'created_timestamp': 1762053437, 'updated_timestamp': 1762053437, 'chunk_window': '0-13', 'chunk_tokenizer': 'DEFAULT_TIKTOKEN_TOKENIZER', 'chunk_embedding_model': 'sentence-transformers/nomic
-ai/nomic-embed-text-v1.5', 'chunk_embedding_dimension': 768, 'content_token_count': 13, 'metadata_token_count': 9}, metadata={'filename': 'test-embedding.txt', 'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'token_count': 13, 'metadata_token_count': 9})], file_id='file-27291dbc679642ac94ffac6d2810c339', filename='test-embedding.txt')
```
Screenshots of UI are displayed below:
### List Vector Store with Added "Create New Vector Store"
<img width="1912" height="491" alt="Screenshot 2025-11-06 at 10 47
25 PM"
src="https://github.com/user-attachments/assets/a3a3ddd9-758d-4005-ac9c-5047f03916f3"
/>
### Create New Vector Store
<img width="1918" height="1048" alt="Screenshot 2025-11-06 at 10 47
49 PM"
src="https://github.com/user-attachments/assets/b4dc0d31-696f-4e68-b109-27915090f158"
/>
### Edit Vector Store
<img width="1916" height="1355" alt="Screenshot 2025-11-06 at 10 48
32 PM"
src="https://github.com/user-attachments/assets/ec879c63-4cf7-489f-bb1e-57ccc7931414"
/>
### Vector Store Files Contents page (with Embeddings)
<img width="1914" height="849" alt="Screenshot 2025-11-06 at 11 54
32 PM"
src="https://github.com/user-attachments/assets/3095520d-0e90-41f7-83bd-652f6c3fbf27"
/>
### Vector Store Files Contents Details page (with Embeddings)
<img width="1916" height="1221" alt="Screenshot 2025-11-06 at 11 55
00 PM"
src="https://github.com/user-attachments/assets/e71dbdc5-5b49-472b-a43a-5785f58d196c"
/>
<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->
## Test Plan
Tests added for Middleware extension and Provider failures.
---------
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
37853ca558
commit
eb3f9ac278
17 changed files with 1161 additions and 125 deletions
|
|
@ -10,7 +10,7 @@
|
|||
# the root directory of this source tree.
|
||||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import Body
|
||||
from fastapi import Body, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.tracing import telemetry_traceable
|
||||
|
|
@ -224,10 +224,16 @@ class VectorStoreContent(BaseModel):
|
|||
|
||||
:param type: Content type, currently only "text" is supported
|
||||
:param text: The actual text content
|
||||
:param embedding: Optional embedding vector for this content chunk
|
||||
:param chunk_metadata: Optional chunk metadata
|
||||
:param metadata: Optional user-defined metadata
|
||||
"""
|
||||
|
||||
type: Literal["text"]
|
||||
text: str
|
||||
embedding: list[float] | None = None
|
||||
chunk_metadata: ChunkMetadata | None = None
|
||||
metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -280,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel):
|
|||
deleted: bool = True
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileContentResponse(BaseModel):
|
||||
"""Represents the parsed content of a vector store file.
|
||||
|
||||
:param object: The object type, which is always `vector_store.file_content.page`
|
||||
:param data: Parsed content of the file
|
||||
:param has_more: Indicates if there are more content pages to fetch
|
||||
:param next_page: The token for the next page, if any
|
||||
"""
|
||||
|
||||
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
|
||||
data: list[VectorStoreContent]
|
||||
has_more: bool = False
|
||||
next_page: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreChunkingStrategyAuto(BaseModel):
|
||||
"""Automatic chunking strategy for vector store files.
|
||||
|
|
@ -395,22 +417,6 @@ class VectorStoreListFilesResponse(BaseModel):
|
|||
has_more: bool = False
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileContentResponse(BaseModel):
|
||||
"""Represents the parsed content of a vector store file.
|
||||
|
||||
:param object: The object type, which is always `vector_store.file_content.page`
|
||||
:param data: Parsed content of the file
|
||||
:param has_more: Indicates if there are more content pages to fetch
|
||||
:param next_page: The token for the next page, if any
|
||||
"""
|
||||
|
||||
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
|
||||
data: list[VectorStoreContent]
|
||||
has_more: bool
|
||||
next_page: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileDeleteResponse(BaseModel):
|
||||
"""Response from deleting a vector store file.
|
||||
|
|
@ -732,12 +738,16 @@ class VectorIO(Protocol):
|
|||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
include_embeddings: Annotated[bool | None, Query(default=False)] = False,
|
||||
include_metadata: Annotated[bool | None, Query(default=False)] = False,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
"""Retrieves the contents of a vector store file.
|
||||
|
||||
:param vector_store_id: The ID of the vector store containing the file to retrieve.
|
||||
:param file_id: The ID of the file to retrieve.
|
||||
:returns: A VectorStoreFileContentResponse representing the file contents.
|
||||
:param include_embeddings: Whether to include embedding vectors in the response.
|
||||
:param include_metadata: Whether to include chunk metadata in the response.
|
||||
:returns: File contents, optionally with embeddings and metadata based on query parameters.
|
||||
"""
|
||||
...
|
||||
|
||||
|
|
|
|||
|
|
@ -389,6 +389,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
|
||||
body |= path_params
|
||||
|
||||
# Pass through params that aren't already handled as path params
|
||||
if options.params:
|
||||
extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
|
||||
if extra_query_params:
|
||||
body["extra_query"] = extra_query_params
|
||||
|
||||
body, field_names = self._handle_file_uploads(options, body)
|
||||
|
||||
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
|
||||
|
|
|
|||
|
|
@ -247,6 +247,13 @@ class VectorIORouter(VectorIO):
|
|||
metadata: dict[str, Any] | None = None,
|
||||
) -> VectorStoreObject:
|
||||
logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
|
||||
|
||||
# Check if provider_id is being changed (not supported)
|
||||
if metadata and "provider_id" in metadata:
|
||||
current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id)
|
||||
if current_store and current_store.provider_id != metadata["provider_id"]:
|
||||
raise ValueError("provider_id cannot be changed after vector store creation")
|
||||
|
||||
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_update_vector_store(
|
||||
vector_store_id=vector_store_id,
|
||||
|
|
@ -338,12 +345,19 @@ class VectorIORouter(VectorIO):
|
|||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
include_embeddings: bool | None = False,
|
||||
include_metadata: bool | None = False,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
|
||||
provider = await self.routing_table.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_contents(
|
||||
logger.debug(
|
||||
f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
|
||||
f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
|
||||
)
|
||||
|
||||
return await self.routing_table.openai_retrieve_vector_store_file_contents(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
include_embeddings=include_embeddings,
|
||||
include_metadata=include_metadata,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_file(
|
||||
|
|
|
|||
|
|
@ -195,12 +195,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
include_embeddings: bool | None = False,
|
||||
include_metadata: bool | None = False,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
await self.assert_action_allowed("read", "vector_store", vector_store_id)
|
||||
|
||||
provider = await self.get_provider_impl(vector_store_id)
|
||||
return await provider.openai_retrieve_vector_store_file_contents(
|
||||
vector_store_id=vector_store_id,
|
||||
file_id=file_id,
|
||||
include_embeddings=include_embeddings,
|
||||
include_metadata=include_metadata,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_file(
|
||||
|
|
|
|||
|
|
@ -704,34 +704,35 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
# Unknown filter type, default to no match
|
||||
raise ValueError(f"Unsupported filter type: {filter_type}")
|
||||
|
||||
def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
|
||||
# content is InterleavedContent
|
||||
def _chunk_to_vector_store_content(
|
||||
self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False
|
||||
) -> list[VectorStoreContent]:
|
||||
def extract_fields() -> dict:
|
||||
"""Extract embedding and metadata fields from chunk based on include flags."""
|
||||
return {
|
||||
"embedding": chunk.embedding if include_embeddings else None,
|
||||
"chunk_metadata": chunk.chunk_metadata if include_metadata else None,
|
||||
"metadata": chunk.metadata if include_metadata else None,
|
||||
}
|
||||
|
||||
fields = extract_fields()
|
||||
|
||||
if isinstance(chunk.content, str):
|
||||
content = [
|
||||
VectorStoreContent(
|
||||
type="text",
|
||||
text=chunk.content,
|
||||
)
|
||||
]
|
||||
content_item = VectorStoreContent(type="text", text=chunk.content, **fields)
|
||||
content = [content_item]
|
||||
elif isinstance(chunk.content, list):
|
||||
# TODO: Add support for other types of content
|
||||
content = [
|
||||
VectorStoreContent(
|
||||
type="text",
|
||||
text=item.text,
|
||||
)
|
||||
for item in chunk.content
|
||||
if item.type == "text"
|
||||
]
|
||||
content = []
|
||||
for item in chunk.content:
|
||||
if item.type == "text":
|
||||
content_item = VectorStoreContent(type="text", text=item.text, **fields)
|
||||
content.append(content_item)
|
||||
else:
|
||||
if chunk.content.type != "text":
|
||||
raise ValueError(f"Unsupported content type: {chunk.content.type}")
|
||||
content = [
|
||||
VectorStoreContent(
|
||||
type="text",
|
||||
text=chunk.content.text,
|
||||
)
|
||||
]
|
||||
|
||||
content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields)
|
||||
content = [content_item]
|
||||
return content
|
||||
|
||||
async def openai_attach_file_to_vector_store(
|
||||
|
|
@ -820,13 +821,12 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
message=str(e),
|
||||
)
|
||||
|
||||
# Create OpenAI vector store file metadata
|
||||
# Save vector store file to persistent storage AFTER insert_chunks
|
||||
# so that chunks include the embeddings that were generated
|
||||
file_info = vector_store_file_object.model_dump(exclude={"last_error"})
|
||||
file_info["filename"] = file_response.filename if file_response else ""
|
||||
|
||||
# Save vector store file to persistent storage (provider-specific)
|
||||
dict_chunks = [c.model_dump() for c in chunks]
|
||||
# This should be updated to include chunk_id
|
||||
await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
|
||||
|
||||
# Update file_ids and file_counts in vector store metadata
|
||||
|
|
@ -921,21 +921,27 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
self,
|
||||
vector_store_id: str,
|
||||
file_id: str,
|
||||
include_embeddings: bool | None = False,
|
||||
include_metadata: bool | None = False,
|
||||
) -> VectorStoreFileContentResponse:
|
||||
"""Retrieves the contents of a vector store file."""
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise VectorStoreNotFoundError(vector_store_id)
|
||||
|
||||
# Parameters are already provided directly
|
||||
# include_embeddings and include_metadata are now function parameters
|
||||
|
||||
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
|
||||
chunks = [Chunk.model_validate(c) for c in dict_chunks]
|
||||
content = []
|
||||
for chunk in chunks:
|
||||
content.extend(self._chunk_to_vector_store_content(chunk))
|
||||
content.extend(
|
||||
self._chunk_to_vector_store_content(
|
||||
chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False
|
||||
)
|
||||
)
|
||||
return VectorStoreFileContentResponse(
|
||||
object="vector_store.file_content.page",
|
||||
data=content,
|
||||
has_more=False,
|
||||
next_page=None,
|
||||
)
|
||||
|
||||
async def openai_update_vector_store_file(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue