feat: allow returning embeddings and metadata from /vector_stores/ methods; disallow changing Provider ID (#4046)

# What does this PR do?

- Updates `/vector_stores/{vector_store_id}/files/{file_id}/content` to
allow returning `embeddings` and `metadata` using the `extra_query`
    -  Updates the UI accordingly to display them.

- Update UI to support CRUD operations in the Vector Stores section and
adds a new modal exposing the functionality.

- Updates Vector Store update to fail if a user tries to update Provider
ID (which doesn't make sense to allow)

```python
In  [1]: client.vector_stores.files.content(
    vector_store_id=vector_store.id, 
    file_id=file.id, 
    extra_query={"include_embeddings": True, "include_metadata": True}
)
Out [1]: FileContentResponse(attributes={}, content=[Content(text='This is a test document to check if embeddings are generated properly.\n', type='text', embedding=[0.33760684728622437, ...,], chunk_metadata={'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'source': None, 'created_timestamp': 1762053437, 'updated_timestamp': 1762053437, 'chunk_window': '0-13', 'chunk_tokenizer': 'DEFAULT_TIKTOKEN_TOKENIZER', 'chunk_embedding_model': 'sentence-transformers/nomic
-ai/nomic-embed-text-v1.5', 'chunk_embedding_dimension': 768, 'content_token_count': 13, 'metadata_token_count': 9}, metadata={'filename': 'test-embedding.txt', 'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'token_count': 13, 'metadata_token_count': 9})], file_id='file-27291dbc679642ac94ffac6d2810c339', filename='test-embedding.txt')
```

Screenshots of UI are displayed below:

### List Vector Store with Added "Create New Vector Store"
<img width="1912" height="491" alt="Screenshot 2025-11-06 at 10 47
25 PM"
src="https://github.com/user-attachments/assets/a3a3ddd9-758d-4005-ac9c-5047f03916f3"
/>

### Create New Vector Store
<img width="1918" height="1048" alt="Screenshot 2025-11-06 at 10 47
49 PM"
src="https://github.com/user-attachments/assets/b4dc0d31-696f-4e68-b109-27915090f158"
/>

### Edit Vector Store
<img width="1916" height="1355" alt="Screenshot 2025-11-06 at 10 48
32 PM"
src="https://github.com/user-attachments/assets/ec879c63-4cf7-489f-bb1e-57ccc7931414"
/>


### Vector Store Files Contents page (with Embeddings)
<img width="1914" height="849" alt="Screenshot 2025-11-06 at 11 54
32 PM"
src="https://github.com/user-attachments/assets/3095520d-0e90-41f7-83bd-652f6c3fbf27"
/>

### Vector Store Files Contents Details page (with Embeddings)
<img width="1916" height="1221" alt="Screenshot 2025-11-06 at 11 55
00 PM"
src="https://github.com/user-attachments/assets/e71dbdc5-5b49-472b-a43a-5785f58d196c"
/>

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
Tests added for Middleware extension and Provider failures.

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Arceo 2025-11-12 12:59:48 -05:00 committed by GitHub
parent 37853ca558
commit eb3f9ac278
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1161 additions and 125 deletions

View file

@ -11,6 +11,7 @@ import pytest
from llama_stack_client import BadRequestError
from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.apis.files import ExpiresAfter
from llama_stack.apis.vector_io import Chunk
from llama_stack.core.library_client import LlamaStackAsLibraryClient
from llama_stack.log import get_logger
@ -1604,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata(
assert "metadata_config_store" in store_names
assert "consistent_config_store" in store_names
@vector_provider_wrapper
def test_openai_vector_store_file_contents_with_extra_query(
compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
):
"""Test that vector store file contents endpoint supports extra_query parameter."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
# Create a vector store
vector_store = compat_client.vector_stores.create(
name="test_extra_query_store",
extra_body={
"embedding_model": embedding_model_id,
"provider_id": vector_io_provider_id,
},
)
# Create and attach a file
test_content = b"This is test content for extra_query validation."
with BytesIO(test_content) as file_buffer:
file_buffer.name = "test_extra_query.txt"
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400),
)
file_attach_response = compat_client.vector_stores.files.create(
vector_store_id=vector_store.id,
file_id=file.id,
extra_body={"embedding_model": embedding_model_id},
)
assert file_attach_response.status == "completed"
# Wait for processing
time.sleep(2)
# Test that extra_query parameter is accepted and processed
content_with_extra_query = compat_client.vector_stores.files.content(
vector_store_id=vector_store.id,
file_id=file.id,
extra_query={"include_embeddings": True, "include_metadata": True},
)
# Test without extra_query for comparison
content_without_extra_query = compat_client.vector_stores.files.content(
vector_store_id=vector_store.id,
file_id=file.id,
)
# Validate that both calls succeed
assert content_with_extra_query is not None
assert content_without_extra_query is not None
assert len(content_with_extra_query.data) > 0
assert len(content_without_extra_query.data) > 0
# Validate that extra_query parameter is processed correctly
# Both should have the embedding/metadata fields available (may be None based on flags)
first_chunk_with_flags = content_with_extra_query.data[0]
first_chunk_without_flags = content_without_extra_query.data[0]
# The key validation: extra_query fields are present in the response
# Handle both dict and object responses (different clients may return different formats)
def has_field(obj, field):
if isinstance(obj, dict):
return field in obj
else:
return hasattr(obj, field)
# Validate that all expected fields are present in both responses
expected_fields = ["embedding", "chunk_metadata", "metadata", "text"]
for field in expected_fields:
assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query"
assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query"
# Validate content is the same
def get_field(obj, field):
if isinstance(obj, dict):
return obj[field]
else:
return getattr(obj, field)
assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8")
assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8")
with_flags_embedding = get_field(first_chunk_with_flags, "embedding")
without_flags_embedding = get_field(first_chunk_without_flags, "embedding")
# Validate that embeddings are included when requested and excluded when not requested
assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"