From eb04731750688de26f0aba7291199f8c9b1521b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Thu, 12 Jun 2025 16:14:32 +0200
Subject: [PATCH 1/2] ci: fix external provider test (#2438)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

The test wasn't using the correct virtual environment. Also augment the
console width for logs.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 .github/workflows/test-external-providers.yml | 10 ++++++----
 llama_stack/distribution/distribution.py      |  1 +
 llama_stack/log.py                            |  2 +-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml
index 06ab7cf3c..cdf18fab7 100644
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@@ -45,20 +45,22 @@ jobs:
 
       - name: Build distro from config file
         run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
 
       - name: Start Llama Stack server in background
         if: ${{ matrix.image-type }} == 'venv'
         env:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
         run: |
-          uv run pip list
-          nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          # Use the virtual environment created by the build step (name comes from build config)
+          source ci-test/bin/activate
+          uv pip list
+          nohup llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |
           for i in {1..30}; do
-            if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
+            if ! grep -q "Successfully loaded external provider remote::custom_ollama" server.log; then
               echo "Waiting for Llama Stack server to load the provider..."
               sleep 1
             else
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index b860d15ab..e37b2c443 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -180,6 +180,7 @@ def get_provider_registry(
                         if provider_type_key in ret[api]:
                             logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
                         ret[api][provider_type_key] = spec
+                        logger.info(f"Successfully loaded external provider {provider_type_key}")
                     except yaml.YAMLError as yaml_err:
                         logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
                         raise yaml_err
diff --git a/llama_stack/log.py b/llama_stack/log.py
index f4184710a..c14967f0a 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -115,7 +115,7 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
 
 class CustomRichHandler(RichHandler):
     def __init__(self, *args, **kwargs):
-        kwargs["console"] = Console(width=120)
+        kwargs["console"] = Console(width=150)
         super().__init__(*args, **kwargs)
 
     def emit(self, record):

From 35c2817d0ae94ab8eda837a1f1b4eef0f9a6ae60 Mon Sep 17 00:00:00 2001
From: Ibrahim Haroon <99413953+Ibrahim-Haroon@users.noreply.github.com>
Date: Thu, 12 Jun 2025 11:23:59 -0400
Subject: [PATCH 2/2] fix(weaviate): handle case where distance is 0 by setting
 score to infinity (#2415)

# What does this PR do?
Fixes provider weaviate `query_vector` function for when the distance
between the query embedding and an embedding within the vector db is 0
(identical vectors). Catches `ZeroDivisionError` and then sets `score`
to infinity, which represent maximum similarity.

<!-- If resolving an issue, uncomment and update the line below -->
Closes [#2381]

## Test Plan
Checkout this PR

Execute this code and there will no longer be a `ZeroDivisionError`
exception
```
from llama_stack_client import LlamaStackClient

base_url = "http://localhost:8321"
client = LlamaStackClient(base_url=base_url)

models = client.models.list()
embedding_model = (
    em := next(m for m in models if m.model_type == "embedding")
).identifier
embedding_dimension = 384

_ = client.vector_dbs.register(
    vector_db_id="foo_db",
    embedding_model=embedding_model,
    embedding_dimension=embedding_dimension,
    provider_id="weaviate",
)

chunk = {
    "content": "foo",
    "mime_type": "text/plain",
    "metadata": {
        "document_id": "foo-id"
    }
}

client.vector_io.insert(vector_db_id="foo_db", chunks=[chunk])
client.vector_io.query(vector_db_id="foo_db", query="foo")
```
---
 .../remote/vector_io/weaviate/weaviate.py     |  2 +-
 tests/integration/vector_io/test_vector_io.py | 33 +++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index e6fe8ccd3..6f2027dad 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -76,7 +76,7 @@ class WeaviateIndex(EmbeddingIndex):
                 continue
 
             chunks.append(chunk)
-            scores.append(1.0 / doc.metadata.distance)
+            scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf"))
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py
index f1cac9701..f550cf666 100644
--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@@ -154,3 +154,36 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
     assert len(response.chunks) > 0
     assert response.chunks[0].metadata["document_id"] == "doc1"
     assert response.chunks[0].metadata["source"] == "precomputed"
+
+
+def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(client_with_empty_registry, embedding_model_id):
+    vector_db_id = "test_precomputed_embeddings_db"
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=384,
+    )
+
+    chunks_with_embeddings = [
+        Chunk(
+            content="duplicate",
+            metadata={"document_id": "doc1", "source": "precomputed"},
+            embedding=[0.1] * 384,
+        ),
+    ]
+
+    client_with_empty_registry.vector_io.insert(
+        vector_db_id=vector_db_id,
+        chunks=chunks_with_embeddings,
+    )
+
+    response = client_with_empty_registry.vector_io.query(
+        vector_db_id=vector_db_id,
+        query="duplicate",
+    )
+
+    # Verify the top result is the expected document
+    assert response is not None
+    assert len(response.chunks) > 0
+    assert response.chunks[0].metadata["document_id"] == "doc1"
+    assert response.chunks[0].metadata["source"] == "precomputed"