Merge branch 'main' into chroma

2025-12-05 18:27:22 +00:00 · 2025-10-15 00:14:05 +09:00 · 2025-10-15 00:14:05 +09:00 · 3f66f55771
commit 3f66f55771
parent b52299d789 007efa6eb5
137 changed files with 35682 additions and 1800 deletions
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -148,8 +148,6 @@ def test_openai_create_vector_store(
        metadata={"purpose": "testing", "environment": "integration"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -177,8 +175,6 @@ def test_openai_list_vector_stores(
        metadata={"type": "test"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )
    store2 = client.vector_stores.create(
@ -186,8 +182,6 @@ def test_openai_list_vector_stores(
        metadata={"type": "test"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -222,8 +216,6 @@ def test_openai_retrieve_vector_store(
        metadata={"purpose": "retrieval_test"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -251,8 +243,6 @@ def test_openai_update_vector_store(
        metadata={"version": "1.0"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )
    time.sleep(1)
@ -284,8 +274,6 @@ def test_openai_delete_vector_store(
        metadata={"purpose": "deletion_test"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -316,8 +304,6 @@ def test_openai_vector_store_search_empty(
        metadata={"purpose": "search_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -348,8 +334,6 @@ def test_openai_vector_store_with_chunks(
        metadata={"purpose": "chunks_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -414,8 +398,6 @@ def test_openai_vector_store_search_relevance(
        metadata={"purpose": "relevance_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -459,8 +441,6 @@ def test_openai_vector_store_search_with_ranking_options(
        metadata={"purpose": "ranking_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -470,8 +450,20 @@ def test_openai_vector_store_search_with_ranking_options(
        chunks=sample_chunks,
    )

+    # First search without threshold to determine reasonable threshold
+    initial_search = compat_client.vector_stores.search(
+        vector_store_id=vector_store.id,
+        query="machine learning and artificial intelligence",
+        max_num_results=3,
+    )
+
+    # Use a threshold that's lower than the lowest score to ensure we get results
+    if initial_search.data:
+        threshold = min(result.score for result in initial_search.data) * 0.9
+    else:
+        threshold = 0.01
+
    # Search with ranking options
-    threshold = 0.1
    search_response = compat_client.vector_stores.search(
        vector_store_id=vector_store.id,
        query="machine learning and artificial intelligence",
@ -502,8 +494,6 @@ def test_openai_vector_store_search_with_high_score_filter(
        metadata={"purpose": "high_score_filtering"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -563,8 +553,6 @@ def test_openai_vector_store_search_with_max_num_results(
        metadata={"purpose": "max_num_results_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -590,6 +578,7 @@ def test_openai_vector_store_attach_file(
 ):
    """Test OpenAI vector store attach file."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+    from llama_stack.apis.files import ExpiresAfter

    compat_client = compat_client_with_empty_stores

@ -598,8 +587,6 @@ def test_openai_vector_store_attach_file(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -607,7 +594,11 @@ def test_openai_vector_store_attach_file(
    test_content = b"The secret string is foobazbar."
    with BytesIO(test_content) as file_buffer:
        file_buffer.name = "openai_test.txt"
-        file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file = compat_client.files.create(
+            file=file_buffer,
+            purpose="assistants",
+            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+        )

    # Attach the file to the vector store
    file_attach_response = compat_client.vector_stores.files.create(
@ -649,13 +640,18 @@ def test_openai_vector_store_attach_files_on_creation(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
+    from llama_stack.apis.files import ExpiresAfter

    # Create some files and attach them to the vector store
    valid_file_ids = []
    for i in range(3):
        with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
            file_buffer.name = f"openai_test_{i}.txt"
-            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+            file = compat_client.files.create(
+                file=file_buffer,
+                purpose="assistants",
+                expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+            )
        valid_file_ids.append(file.id)

    # include an invalid file ID so we can test failed status
@ -668,8 +664,6 @@ def test_openai_vector_store_attach_files_on_creation(
        file_ids=file_ids,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -709,14 +703,13 @@ def test_openai_vector_store_list_files(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
+    from llama_stack.apis.files import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -725,7 +718,11 @@ def test_openai_vector_store_list_files(
    for i in range(3):
        with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
            file_buffer.name = f"openai_test_{i}.txt"
-            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+            file = compat_client.files.create(
+                file=file_buffer,
+                purpose="assistants",
+                expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+            )

        response = compat_client.vector_stores.files.create(
            vector_store_id=vector_store.id,
@ -795,14 +792,13 @@ def test_openai_vector_store_retrieve_file_contents(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
+    from llama_stack.apis.files import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -812,7 +808,11 @@ def test_openai_vector_store_retrieve_file_contents(
    attributes = {"foo": "bar"}
    with BytesIO(test_content) as file_buffer:
        file_buffer.name = file_name
-        file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file = compat_client.files.create(
+            file=file_buffer,
+            purpose="assistants",
+            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+        )

    # Attach the file to the vector store
    file_attach_response = compat_client.vector_stores.files.create(
@ -821,8 +821,6 @@ def test_openai_vector_store_retrieve_file_contents(
        attributes=attributes,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -853,14 +851,13 @@ def test_openai_vector_store_delete_file(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
+    from llama_stack.apis.files import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -869,7 +866,11 @@ def test_openai_vector_store_delete_file(
    for i in range(3):
        with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
            file_buffer.name = f"openai_test_{i}.txt"
-            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+            file = compat_client.files.create(
+                file=file_buffer,
+                purpose="assistants",
+                expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+            )

        compat_client.vector_stores.files.create(
            vector_store_id=vector_store.id,
@ -914,14 +915,13 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
+    from llama_stack.apis.files import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -929,7 +929,11 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
    test_content = b"The secret string is foobazbar."
    with BytesIO(test_content) as file_buffer:
        file_buffer.name = "openai_test.txt"
-        file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file = compat_client.files.create(
+            file=file_buffer,
+            purpose="assistants",
+            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+        )

    # Attach the file to the vector store
    file_attach_response = compat_client.vector_stores.files.create(
@ -961,14 +965,13 @@ def test_openai_vector_store_update_file(
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)

    compat_client = compat_client_with_empty_stores
+    from llama_stack.apis.files import ExpiresAfter

    # Create a vector store
    vector_store = compat_client.vector_stores.create(
        name="test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -976,7 +979,11 @@ def test_openai_vector_store_update_file(
    test_content = b"This is a test file"
    with BytesIO(test_content) as file_buffer:
        file_buffer.name = "openai_test.txt"
-        file = compat_client.files.create(file=file_buffer, purpose="assistants")
+        file = compat_client.files.create(
+            file=file_buffer,
+            purpose="assistants",
+            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+        )

    # Attach the file to the vector store
    file_attach_response = compat_client.vector_stores.files.create(
@ -1013,6 +1020,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
    This test confirms that client.vector_stores.create() creates a unique ID
    """
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
+    from llama_stack.apis.files import ExpiresAfter

    compat_client = compat_client_with_empty_stores

@ -1021,15 +1029,17 @@ def test_create_vector_store_files_duplicate_vector_store_name(
    for i in range(3):
        with BytesIO(f"This is a test file {i}".encode()) as file_buffer:
            file_buffer.name = f"openai_test_{i}.txt"
-            file = compat_client.files.create(file=file_buffer, purpose="assistants")
+            file = compat_client.files.create(
+                file=file_buffer,
+                purpose="assistants",
+                expires_after=ExpiresAfter(anchor="created_at", seconds=86400),  # 24 hours
+            )
        file_ids.append(file.id)

    vector_store = compat_client.vector_stores.create(
        name="test_store_with_files",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )
    assert vector_store.file_counts.completed == 0
@ -1042,8 +1052,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(
        name="test_store_with_files",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1055,8 +1063,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(
        file_id=file_ids[0],
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )
    assert created_file.status == "completed"
@ -1067,8 +1073,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(
        file_id=file_ids[1],
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )
    assert created_file_from_non_deleted_vector_store.status == "completed"
@ -1089,8 +1093,6 @@ def test_openai_vector_store_search_modes(
        metadata={"purpose": "search_mode_testing"},
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1122,8 +1124,6 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
        name="batch_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1141,8 +1141,6 @@ def test_openai_vector_store_file_batch_create_and_retrieve(
        file_ids=file_ids,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1189,8 +1187,6 @@ def test_openai_vector_store_file_batch_list_files(
        name="batch_list_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1208,8 +1204,6 @@ def test_openai_vector_store_file_batch_list_files(
        file_ids=file_ids,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1286,8 +1280,6 @@ def test_openai_vector_store_file_batch_cancel(
        name="batch_cancel_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1305,8 +1297,6 @@ def test_openai_vector_store_file_batch_cancel(
        file_ids=file_ids,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1345,8 +1335,6 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        name="batch_contents_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1369,8 +1357,6 @@ def test_openai_vector_store_file_batch_retrieve_contents(
        file_ids=file_ids,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1422,8 +1408,6 @@ def test_openai_vector_store_file_batch_error_handling(
        name="batch_error_test_store",
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -1435,8 +1419,6 @@ def test_openai_vector_store_file_batch_error_handling(
        file_ids=file_ids,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@ -52,8 +52,6 @@ def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embe
        name=vector_db_name,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -73,8 +71,6 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
        name=vector_db_name,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -110,8 +106,6 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding
        name=vector_db_name,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -152,8 +146,6 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
        name=vector_db_name,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -202,8 +194,6 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
        name=vector_db_name,
        extra_body={
            "embedding_model": embedding_model_id,
-            "embedding_dimension": embedding_dimension,
-            "provider_id": "my_provider",
        },
    )

@ -234,3 +224,35 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
    assert len(response.chunks) > 0
    assert response.chunks[0].metadata["document_id"] == "doc1"
    assert response.chunks[0].metadata["source"] == "precomputed"
+
+
+def test_auto_extract_embedding_dimension(client_with_empty_registry, embedding_model_id):
+    vs = client_with_empty_registry.vector_stores.create(
+        name="test_auto_extract", extra_body={"embedding_model": embedding_model_id}
+    )
+    assert vs.id is not None
+
+
+def test_provider_auto_selection_single_provider(client_with_empty_registry, embedding_model_id):
+    providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
+    if len(providers) != 1:
+        pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")
+
+    vs = client_with_empty_registry.vector_stores.create(
+        name="test_auto_provider", extra_body={"embedding_model": embedding_model_id}
+    )
+    assert vs.id is not None
+
+
+def test_provider_id_override(client_with_empty_registry, embedding_model_id):
+    providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
+    if len(providers) != 1:
+        pytest.skip(f"Test requires exactly one vector_io provider, found {len(providers)}")
+
+    provider_id = providers[0].provider_id
+
+    vs = client_with_empty_registry.vector_stores.create(
+        name="test_provider_override", extra_body={"embedding_model": embedding_model_id, "provider_id": provider_id}
+    )
+    assert vs.id is not None
+    assert vs.metadata.get("provider_id") == provider_id