Merge branch 'main' into chroma

This commit is contained in:
kimbwook 2025-10-15 00:14:05 +09:00
commit 3f66f55771
No known key found for this signature in database
GPG key ID: 13B032C99CBD373A
137 changed files with 35682 additions and 1800 deletions

View file

@ -148,8 +148,6 @@ def test_openai_create_vector_store(
metadata={"purpose": "testing", "environment": "integration"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -177,8 +175,6 @@ def test_openai_list_vector_stores(
metadata={"type": "test"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
store2 = client.vector_stores.create(
@ -186,8 +182,6 @@ def test_openai_list_vector_stores(
metadata={"type": "test"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -222,8 +216,6 @@ def test_openai_retrieve_vector_store(
metadata={"purpose": "retrieval_test"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -251,8 +243,6 @@ def test_openai_update_vector_store(
metadata={"version": "1.0"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
time.sleep(1)
@ -284,8 +274,6 @@ def test_openai_delete_vector_store(
metadata={"purpose": "deletion_test"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -316,8 +304,6 @@ def test_openai_vector_store_search_empty(
metadata={"purpose": "search_testing"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -348,8 +334,6 @@ def test_openai_vector_store_with_chunks(
metadata={"purpose": "chunks_testing"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -414,8 +398,6 @@ def test_openai_vector_store_search_relevance(
metadata={"purpose": "relevance_testing"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -459,8 +441,6 @@ def test_openai_vector_store_search_with_ranking_options(
metadata={"purpose": "ranking_testing"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -470,8 +450,20 @@ def test_openai_vector_store_search_with_ranking_options(
chunks=sample_chunks,
)
# First search without threshold to determine reasonable threshold
initial_search = compat_client.vector_stores.search(
vector_store_id=vector_store.id,
query="machine learning and artificial intelligence",
max_num_results=3,
)
# Use a threshold that's lower than the lowest score to ensure we get results
if initial_search.data:
threshold = min(result.score for result in initial_search.data) * 0.9
else:
threshold = 0.01
# Search with ranking options
threshold = 0.1
search_response = compat_client.vector_stores.search(
vector_store_id=vector_store.id,
query="machine learning and artificial intelligence",
@ -502,8 +494,6 @@ def test_openai_vector_store_search_with_high_score_filter(
metadata={"purpose": "high_score_filtering"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -563,8 +553,6 @@ def test_openai_vector_store_search_with_max_num_results(
metadata={"purpose": "max_num_results_testing"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -590,6 +578,7 @@ def test_openai_vector_store_attach_file(
):
"""Test OpenAI vector store attach file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
from llama_stack.apis.files import ExpiresAfter
compat_client = compat_client_with_empty_stores
@ -598,8 +587,6 @@ def test_openai_vector_store_attach_file(
name="test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -607,7 +594,11 @@ def test_openai_vector_store_attach_file(
test_content = b"The secret string is foobazbar."
with BytesIO(test_content) as file_buffer:
file_buffer.name = "openai_test.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
# Attach the file to the vector store
file_attach_response = compat_client.vector_stores.files.create(
@ -649,13 +640,18 @@ def test_openai_vector_store_attach_files_on_creation(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack.apis.files import ExpiresAfter
# Create some files and attach them to the vector store
valid_file_ids = []
for i in range(3):
with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
file_buffer.name = f"openai_test_{i}.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
valid_file_ids.append(file.id)
# include an invalid file ID so we can test failed status
@ -668,8 +664,6 @@ def test_openai_vector_store_attach_files_on_creation(
file_ids=file_ids,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -709,14 +703,13 @@ def test_openai_vector_store_list_files(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack.apis.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
name="test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -725,7 +718,11 @@ def test_openai_vector_store_list_files(
for i in range(3):
with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
file_buffer.name = f"openai_test_{i}.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
response = compat_client.vector_stores.files.create(
vector_store_id=vector_store.id,
@ -795,14 +792,13 @@ def test_openai_vector_store_retrieve_file_contents(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack.apis.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
name="test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -812,7 +808,11 @@ def test_openai_vector_store_retrieve_file_contents(
attributes = {"foo": "bar"}
with BytesIO(test_content) as file_buffer:
file_buffer.name = file_name
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
# Attach the file to the vector store
file_attach_response = compat_client.vector_stores.files.create(
@ -821,8 +821,6 @@ def test_openai_vector_store_retrieve_file_contents(
attributes=attributes,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -853,14 +851,13 @@ def test_openai_vector_store_delete_file(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack.apis.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
name="test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -869,7 +866,11 @@ def test_openai_vector_store_delete_file(
for i in range(3):
with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
file_buffer.name = f"openai_test_{i}.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
compat_client.vector_stores.files.create(
vector_store_id=vector_store.id,
@ -914,14 +915,13 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack.apis.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
name="test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -929,7 +929,11 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
test_content = b"The secret string is foobazbar."
with BytesIO(test_content) as file_buffer:
file_buffer.name = "openai_test.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
# Attach the file to the vector store
file_attach_response = compat_client.vector_stores.files.create(
@ -961,14 +965,13 @@ def test_openai_vector_store_update_file(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack.apis.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
name="test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -976,7 +979,11 @@ def test_openai_vector_store_update_file(
test_content = b"This is a test file"
with BytesIO(test_content) as file_buffer:
file_buffer.name = "openai_test.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
# Attach the file to the vector store
file_attach_response = compat_client.vector_stores.files.create(
@ -1013,6 +1020,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
This test confirms that client.vector_stores.create() creates a unique ID
"""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
from llama_stack.apis.files import ExpiresAfter
compat_client = compat_client_with_empty_stores
@ -1021,15 +1029,17 @@ def test_create_vector_store_files_duplicate_vector_store_name(
for i in range(3):
with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
file_buffer.name = f"openai_test_{i}.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400), # 24 hours
)
file_ids.append(file.id)
vector_store = compat_client.vector_stores.create(
name="test_store_with_files",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
assert vector_store.file_counts.completed == 0
@ -1042,8 +1052,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(
name="test_store_with_files",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1055,8 +1063,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(
file_id=file_ids[0],
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
assert created_file.status == "completed"
@ -1067,8 +1073,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(
file_id=file_ids[1],
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
assert created_file_from_non_deleted_vector_store.status == "completed"
@ -1089,8 +1093,6 @@ def test_openai_vector_store_search_modes(
metadata={"purpose": "search_mode_testing"},
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1122,8 +1124,6 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
name="batch_test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1141,8 +1141,6 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
file_ids=file_ids,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1189,8 +1187,6 @@ def test_openai_vector_store_file_batch_list_files(
name="batch_list_test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1208,8 +1204,6 @@ def test_openai_vector_store_file_batch_list_files(
file_ids=file_ids,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1286,8 +1280,6 @@ def test_openai_vector_store_file_batch_cancel(
name="batch_cancel_test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1305,8 +1297,6 @@ def test_openai_vector_store_file_batch_cancel(
file_ids=file_ids,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1345,8 +1335,6 @@ def test_openai_vector_store_file_batch_retrieve_contents(
name="batch_contents_test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1369,8 +1357,6 @@ def test_openai_vector_store_file_batch_retrieve_contents(
file_ids=file_ids,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1422,8 +1408,6 @@ def test_openai_vector_store_file_batch_error_handling(
name="batch_error_test_store",
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -1435,8 +1419,6 @@ def test_openai_vector_store_file_batch_error_handling(
file_ids=file_ids,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)

View file

@ -52,8 +52,6 @@ def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embe
name=vector_db_name,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -73,8 +71,6 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
name=vector_db_name,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -110,8 +106,6 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding
name=vector_db_name,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -152,8 +146,6 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
name=vector_db_name,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -202,8 +194,6 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
name=vector_db_name,
extra_body={
"embedding_model": embedding_model_id,
"embedding_dimension": embedding_dimension,
"provider_id": "my_provider",
},
)
@ -234,3 +224,35 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
assert len(response.chunks) > 0
assert response.chunks[0].metadata["document_id"] == "doc1"
assert response.chunks[0].metadata["source"] == "precomputed"
def test_auto_extract_embedding_dimension(client_with_empty_registry, embedding_model_id):
vs = client_with_empty_registry.vector_stores.create(
name="test_auto_extract", extra_body={"embedding_model": embedding_model_id}
)
assert vs.id is not None
def test_provider_auto_selection_single_provider(client_with_empty_registry, embedding_model_id):
providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
if len(providers) != 1:
pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")
vs = client_with_empty_registry.vector_stores.create(
name="test_auto_provider", extra_body={"embedding_model": embedding_model_id}
)
assert vs.id is not None
def test_provider_id_override(client_with_empty_registry, embedding_model_id):
providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
if len(providers) != 1:
pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")
provider_id = providers[0].provider_id
vs = client_with_empty_registry.vector_stores.create(
name="test_provider_override", extra_body={"embedding_model": embedding_model_id, "provider_id": provider_id}
)
assert vs.id is not None
assert vs.metadata.get("provider_id") == provider_id