diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 5751cca2d..2b576a1a9 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -13756,6 +13756,24 @@
"type": "object",
"title": "Response"
},
+ "VectorStoreContent": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "text"
+ },
+ "text": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "text"
+ ],
+ "title": "VectorStoreContent"
+ },
"VectorStoreFileContentsResponse": {
"type": "object",
"properties": {
@@ -13793,7 +13811,7 @@
"content": {
"type": "array",
"items": {
- "$ref": "#/components/schemas/InterleavedContentItem"
+ "$ref": "#/components/schemas/VectorStoreContent"
}
}
},
@@ -13879,24 +13897,6 @@
],
"title": "OpenaiSearchVectorStoreRequest"
},
- "VectorStoreContent": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "text"
- },
- "text": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "text"
- ],
- "title": "VectorStoreContent"
- },
"VectorStoreSearchResponse": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 79e9285b5..160193e6a 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -9616,6 +9616,19 @@ components:
Response:
type: object
title: Response
+ VectorStoreContent:
+ type: object
+ properties:
+ type:
+ type: string
+ const: text
+ text:
+ type: string
+ additionalProperties: false
+ required:
+ - type
+ - text
+ title: VectorStoreContent
VectorStoreFileContentsResponse:
type: object
properties:
@@ -9636,7 +9649,7 @@ components:
content:
type: array
items:
- $ref: '#/components/schemas/InterleavedContentItem'
+ $ref: '#/components/schemas/VectorStoreContent'
additionalProperties: false
required:
- file_id
@@ -9693,19 +9706,6 @@ components:
required:
- query
title: OpenaiSearchVectorStoreRequest
- VectorStoreContent:
- type: object
- properties:
- type:
- type: string
- const: text
- text:
- type: string
- additionalProperties: false
- required:
- - type
- - text
- title: VectorStoreContent
VectorStoreSearchResponse:
type: object
properties:
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index ab5b3e567..6a674356d 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -12,7 +12,6 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field
-from llama_stack.apis.common.content_types import InterleavedContentItem
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
@@ -208,7 +207,7 @@ class VectorStoreFileContentsResponse(BaseModel):
file_id: str
filename: str
attributes: dict[str, Any]
- content: list[InterleavedContentItem]
+ content: list[VectorStoreContent]
@json_schema_type
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index 32bcccd97..2602acd4f 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -12,7 +12,6 @@ import uuid
from abc import ABC, abstractmethod
from typing import Any
-from llama_stack.apis.common.content_types import InterleavedContentItem, TextContentItem
from llama_stack.apis.files import Files
from llama_stack.apis.files.files import OpenAIFileObject
from llama_stack.apis.vector_dbs import VectorDB
@@ -386,33 +385,7 @@ class OpenAIVectorStoreMixin(ABC):
if not self._matches_filters(chunk.metadata, filters):
continue
- # content is InterleavedContent
- if isinstance(chunk.content, str):
- content = [
- VectorStoreContent(
- type="text",
- text=chunk.content,
- )
- ]
- elif isinstance(chunk.content, list):
- # TODO: Add support for other types of content
- content = [
- VectorStoreContent(
- type="text",
- text=item.text,
- )
- for item in chunk.content
- if item.type == "text"
- ]
- else:
- if chunk.content.type != "text":
- raise ValueError(f"Unsupported content type: {chunk.content.type}")
- content = [
- VectorStoreContent(
- type="text",
- text=chunk.content.text,
- )
- ]
+ content = self._chunk_to_vector_store_content(chunk)
response_data_item = VectorStoreSearchResponse(
file_id=chunk.metadata.get("file_id", ""),
@@ -488,6 +461,36 @@ class OpenAIVectorStoreMixin(ABC):
# Unknown filter type, default to no match
raise ValueError(f"Unsupported filter type: {filter_type}")
+ def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
+ # content is InterleavedContent
+ if isinstance(chunk.content, str):
+ content = [
+ VectorStoreContent(
+ type="text",
+ text=chunk.content,
+ )
+ ]
+ elif isinstance(chunk.content, list):
+ # TODO: Add support for other types of content
+ content = [
+ VectorStoreContent(
+ type="text",
+ text=item.text,
+ )
+ for item in chunk.content
+ if item.type == "text"
+ ]
+ else:
+ if chunk.content.type != "text":
+ raise ValueError(f"Unsupported content type: {chunk.content.type}")
+ content = [
+ VectorStoreContent(
+ type="text",
+ text=chunk.content.text,
+ )
+ ]
+ return content
+
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
@@ -634,20 +637,14 @@ class OpenAIVectorStoreMixin(ABC):
file_info = await self._load_openai_vector_store_file(vector_store_id, file_id)
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks]
- contents: list[InterleavedContentItem] = []
+ content = []
for chunk in chunks:
- content = chunk.content
- if isinstance(content, str):
- contents.append(TextContentItem(text=content))
- elif isinstance(content, InterleavedContentItem):
- contents.append(content)
- else:
- contents.extend(contents)
+ content.extend(self._chunk_to_vector_store_content(chunk))
return VectorStoreFileContentsResponse(
file_id=file_id,
filename=file_info.get("filename", ""),
attributes=file_info.get("attributes", {}),
- content=contents,
+ content=content,
)
async def openai_update_vector_store_file(
@@ -684,6 +681,10 @@ class OpenAIVectorStoreMixin(ABC):
await self._delete_openai_vector_store_file_from_storage(vector_store_id, file_id)
# TODO: We need to actually delete the embeddings from the underlying vector store...
+ # Also uncomment the corresponding integration test marked as xfail
+ #
+ # test_openai_vector_store_delete_file_removes_from_vector_store in
+ # tests/integration/vector_io/test_openai_vector_stores.py
# Update in-memory cache
store_info["file_ids"].remove(file_id)
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index e7eccf46d..0440cd21c 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -440,7 +440,7 @@ def test_openai_vector_store_search_with_max_num_results(
assert len(search_response.data) == 2
-def test_openai_vector_store_attach_file_response_attributes(compat_client_with_empty_stores, client_with_models):
+def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store attach file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
@@ -453,7 +453,7 @@ def test_openai_vector_store_attach_file_response_attributes(compat_client_with_
vector_store = compat_client.vector_stores.create(name="test_store")
# Create a file
- test_content = b"This is a test file"
+ test_content = b"The secret string is foobazbar."
with BytesIO(test_content) as file_buffer:
file_buffer.name = "openai_test.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
@@ -480,6 +480,16 @@ def test_openai_vector_store_attach_file_response_attributes(compat_client_with_
assert updated_vector_store.file_counts.failed == 0
assert updated_vector_store.file_counts.in_progress == 0
+ # Search using OpenAI API to confirm our file attached
+ search_response = compat_client.vector_stores.search(
+ vector_store_id=vector_store.id, query="What is the secret string?", max_num_results=1
+ )
+ assert search_response is not None
+ assert len(search_response.data) > 0
+ top_result = search_response.data[0]
+ top_content = top_result.content[0].text
+ assert "foobazbar" in top_content.lower()
+
def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store attach files on creation."""
@@ -689,6 +699,49 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
assert updated_vector_store.file_counts.in_progress == 0
+# TODO: Remove this xfail once we have a way to remove embeddings from vector store
+@pytest.mark.xfail(reason="Vector Store Files delete doesn't remove embeddings from vecntor store", strict=True)
+def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client_with_empty_stores, client_with_models):
+ """Test OpenAI vector store delete file removes from vector store."""
+ skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+
+ if isinstance(compat_client_with_empty_stores, LlamaStackClient):
+ pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
+
+ compat_client = compat_client_with_empty_stores
+
+ # Create a vector store
+ vector_store = compat_client.vector_stores.create(name="test_store")
+
+ # Create a file
+ test_content = b"The secret string is foobazbar."
+ with BytesIO(test_content) as file_buffer:
+ file_buffer.name = "openai_test.txt"
+ file = compat_client.files.create(file=file_buffer, purpose="assistants")
+
+ # Attach the file to the vector store
+ file_attach_response = compat_client.vector_stores.files.create(
+ vector_store_id=vector_store.id,
+ file_id=file.id,
+ )
+ assert file_attach_response.status == "completed"
+
+ # Search using OpenAI API to confirm our file attached
+ search_response = compat_client.vector_stores.search(
+ vector_store_id=vector_store.id, query="What is the secret string?", max_num_results=1
+ )
+ assert "foobazbar" in search_response.data[0].content[0].text.lower()
+
+ # Delete the file
+ compat_client.vector_stores.files.delete(vector_store_id=vector_store.id, file_id=file.id)
+
+ # Search using OpenAI API to confirm our file deleted
+ search_response = compat_client.vector_stores.search(
+ vector_store_id=vector_store.id, query="What is the secret string?", max_num_results=1
+ )
+ assert not search_response.data
+
+
def test_openai_vector_store_update_file(compat_client_with_empty_stores, client_with_models):
"""Test OpenAI vector store update file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)