From 6039d922c0c7b93d64474e6a04896b6d555d57f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 19 Jun 2025 09:59:04 +0200
Subject: [PATCH] fix: allow running vector tests with embedding dimension
 (#2467)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

Do not force 384 for the embedding dimension, use the one provided by
the test run.

## Test Plan

```
 pytest -s -vvv tests/integration/vector_io/test_vector_io.py --stack-config=http://localhost:8321 \
    -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
    --text-model="meta-llama/Llama-3.2-3B-Instruct" \
    --embedding-model=granite-embedding-125m --embedding-dimension=768
Uninstalled 1 package in 16ms
Installed 1 package in 11ms
INFO     2025-06-18 10:52:03,314 tests.integration.conftest:59 tests: Setting DISABLE_CODE_SANDBOX=1 for macOS
/Users/leseb/Documents/AI/llama-stack/.venv/lib/python3.10/site-packages/pytest_asyncio/plugin.py:207: PytestDeprecationWarning: The configuration option "asyncio_default_fixture_loop_scope" is unset.
The event loop scope for asynchronous fixtures will default to the fixture caching scope. Future versions of pytest-asyncio will default the loop scope for asynchronous fixtures to function scope. Set the default fixture loop scope explicitly in order to avoid unexpected behavior in the future. Valid fixture loop scopes are: "function", "class", "module", "package", "session"

  warnings.warn(PytestDeprecationWarning(_DEFAULT_FIXTURE_LOOP_SCOPE_UNSET))
================================================= test session starts =================================================
platform darwin -- Python 3.10.16, pytest-8.3.4, pluggy-1.5.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python
cachedir: .pytest_cache
metadata: {'Python': '3.10.16', 'Platform': 'macOS-15.5-arm64-arm-64bit', 'Packages': {'pytest': '8.3.4', 'pluggy': '1.5.0'}, 'Plugins': {'cov': '6.0.0', 'html': '4.1.1', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'asyncio': '0.25.3', 'anyio': '4.8.0', 'nbval': '0.11.0'}}
rootdir: /Users/leseb/Documents/AI/llama-stack
configfile: pyproject.toml
plugins: cov-6.0.0, html-4.1.1, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, asyncio-0.25.3, anyio-4.8.0, nbval-0.11.0
asyncio: mode=strict, asyncio_default_fixture_loop_scope=None
collected 8 items

tests/integration/vector_io/test_vector_io.py::test_vector_db_retrieve[emb=granite-embedding-125m:dim=768] PASSED
tests/integration/vector_io/test_vector_io.py::test_vector_db_register[emb=granite-embedding-125m:dim=768] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case0] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case1] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case2] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case3] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=granite-embedding-125m:dim=768-test_case4] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks_with_precomputed_embeddings[emb=granite-embedding-125m:dim=768] PASSED

================================================== 8 passed in 5.50s ==================================================
```

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 tests/integration/conftest.py                 |  1 +
 .../integration/tool_runtime/test_rag_tool.py | 16 +++++++-----
 tests/integration/vector_io/test_vector_io.py | 26 ++++++++++---------
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index ec5918268..fa96688c0 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -95,6 +95,7 @@ def pytest_addoption(parser):
     parser.addoption(
         "--embedding-dimension",
         type=int,
+        default=384,
         help="Output dimensionality of the embedding model to use for testing. Default: 384",
     )
     parser.addoption(
diff --git a/tests/integration/tool_runtime/test_rag_tool.py b/tests/integration/tool_runtime/test_rag_tool.py
index 46f4f8768..2affe2a2d 100644
--- a/tests/integration/tool_runtime/test_rag_tool.py
+++ b/tests/integration/tool_runtime/test_rag_tool.py
@@ -63,12 +63,14 @@ def assert_valid_text_response(response):
     assert all(isinstance(chunk.text, str) for chunk in response.content)
 
 
-def test_vector_db_insert_inline_and_query(client_with_empty_registry, sample_documents, embedding_model_id):
+def test_vector_db_insert_inline_and_query(
+    client_with_empty_registry, sample_documents, embedding_model_id, embedding_dimension
+):
     vector_db_id = "test_vector_db"
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     client_with_empty_registry.tool_runtime.rag_tool.insert(
@@ -116,7 +118,9 @@ def test_vector_db_insert_inline_and_query(client_with_empty_registry, sample_do
     assert all(score >= 0.01 for score in response4.scores)
 
 
-def test_vector_db_insert_from_url_and_query(client_with_empty_registry, sample_documents, embedding_model_id):
+def test_vector_db_insert_from_url_and_query(
+    client_with_empty_registry, sample_documents, embedding_model_id, embedding_dimension
+):
     providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
     assert len(providers) > 0
 
@@ -125,7 +129,7 @@ def test_vector_db_insert_from_url_and_query(client_with_empty_registry, sample_
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     # list to check memory bank is successfully registered
@@ -170,7 +174,7 @@ def test_vector_db_insert_from_url_and_query(client_with_empty_registry, sample_
     assert any("llama2" in chunk.content.lower() for chunk in response2.chunks)
 
 
-def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id):
+def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id, embedding_dimension):
     providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
     assert len(providers) > 0
 
@@ -179,7 +183,7 @@ def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_i
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index f550cf666..95fcb8db5 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -46,13 +46,13 @@ def client_with_empty_registry(client_with_models):
     clear_registry()
 
 
-def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id):
+def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension):
     # Register a memory bank first
     vector_db_id = "test_vector_db"
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     # Retrieve the memory bank and validate its properties
@@ -63,12 +63,12 @@ def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id):
     assert response.provider_resource_id == vector_db_id
 
 
-def test_vector_db_register(client_with_empty_registry, embedding_model_id):
+def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension):
     vector_db_id = "test_vector_db"
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     vector_dbs_after_register = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
@@ -90,12 +90,12 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id):
         ("How does machine learning improve over time?", "doc2"),
     ],
 )
-def test_insert_chunks(client_with_empty_registry, embedding_model_id, sample_chunks, test_case):
+def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case):
     vector_db_id = "test_vector_db"
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     client_with_empty_registry.vector_io.insert(
@@ -122,19 +122,19 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, sample_ch
     assert top_match.metadata["document_id"] == expected_doc_id, f"Query '{query}' should match {expected_doc_id}"
 
 
-def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, embedding_model_id):
+def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, embedding_model_id, embedding_dimension):
     vector_db_id = "test_precomputed_embeddings_db"
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     chunks_with_embeddings = [
         Chunk(
             content="This is a test chunk with precomputed embedding.",
             metadata={"document_id": "doc1", "source": "precomputed"},
-            embedding=[0.1] * 384,
+            embedding=[0.1] * int(embedding_dimension),
         ),
     ]
 
@@ -156,19 +156,21 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
     assert response.chunks[0].metadata["source"] == "precomputed"
 
 
-def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(client_with_empty_registry, embedding_model_id):
+def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
+    client_with_empty_registry, embedding_model_id, embedding_dimension
+):
     vector_db_id = "test_precomputed_embeddings_db"
     client_with_empty_registry.vector_dbs.register(
         vector_db_id=vector_db_id,
         embedding_model=embedding_model_id,
-        embedding_dimension=384,
+        embedding_dimension=embedding_dimension,
     )
 
     chunks_with_embeddings = [
         Chunk(
             content="duplicate",
             metadata={"document_id": "doc1", "source": "precomputed"},
-            embedding=[0.1] * 384,
+            embedding=[0.1] * int(embedding_dimension),
         ),
     ]