From eb04731750688de26f0aba7291199f8c9b1521b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Thu, 12 Jun 2025 16:14:32 +0200 Subject: [PATCH 1/2] ci: fix external provider test (#2438) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? The test wasn't using the correct virtual environment. Also augment the console width for logs. Signed-off-by: Sébastien Han --- .github/workflows/test-external-providers.yml | 10 ++++++---- llama_stack/distribution/distribution.py | 1 + llama_stack/log.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml index 06ab7cf3c..cdf18fab7 100644 --- a/.github/workflows/test-external-providers.yml +++ b/.github/workflows/test-external-providers.yml @@ -45,20 +45,22 @@ jobs: - name: Build distro from config file run: | - USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml + USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml - name: Start Llama Stack server in background if: ${{ matrix.image-type }} == 'venv' env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - uv run pip list - nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 & + # Use the virtual environment created by the build step (name comes from build config) + source ci-test/bin/activate + uv pip list + nohup llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | for i in {1..30}; do - if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then + if ! grep -q "Successfully loaded external provider remote::custom_ollama" server.log; then echo "Waiting for Llama Stack server to load the provider..." sleep 1 else diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py index b860d15ab..e37b2c443 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/distribution/distribution.py @@ -180,6 +180,7 @@ def get_provider_registry( if provider_type_key in ret[api]: logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}") ret[api][provider_type_key] = spec + logger.info(f"Successfully loaded external provider {provider_type_key}") except yaml.YAMLError as yaml_err: logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}") raise yaml_err diff --git a/llama_stack/log.py b/llama_stack/log.py index f4184710a..c14967f0a 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -115,7 +115,7 @@ def parse_environment_config(env_config: str) -> dict[str, int]: class CustomRichHandler(RichHandler): def __init__(self, *args, **kwargs): - kwargs["console"] = Console(width=120) + kwargs["console"] = Console(width=150) super().__init__(*args, **kwargs) def emit(self, record): From 35c2817d0ae94ab8eda837a1f1b4eef0f9a6ae60 Mon Sep 17 00:00:00 2001 From: Ibrahim Haroon <99413953+Ibrahim-Haroon@users.noreply.github.com> Date: Thu, 12 Jun 2025 11:23:59 -0400 Subject: [PATCH 2/2] fix(weaviate): handle case where distance is 0 by setting score to infinity (#2415) # What does this PR do? Fixes provider weaviate `query_vector` function for when the distance between the query embedding and an embedding within the vector db is 0 (identical vectors). Catches `ZeroDivisionError` and then sets `score` to infinity, which represent maximum similarity. Closes [#2381] ## Test Plan Checkout this PR Execute this code and there will no longer be a `ZeroDivisionError` exception ``` from llama_stack_client import LlamaStackClient base_url = "http://localhost:8321" client = LlamaStackClient(base_url=base_url) models = client.models.list() embedding_model = ( em := next(m for m in models if m.model_type == "embedding") ).identifier embedding_dimension = 384 _ = client.vector_dbs.register( vector_db_id="foo_db", embedding_model=embedding_model, embedding_dimension=embedding_dimension, provider_id="weaviate", ) chunk = { "content": "foo", "mime_type": "text/plain", "metadata": { "document_id": "foo-id" } } client.vector_io.insert(vector_db_id="foo_db", chunks=[chunk]) client.vector_io.query(vector_db_id="foo_db", query="foo") ``` --- .../remote/vector_io/weaviate/weaviate.py | 2 +- tests/integration/vector_io/test_vector_io.py | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index e6fe8ccd3..6f2027dad 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -76,7 +76,7 @@ class WeaviateIndex(EmbeddingIndex): continue chunks.append(chunk) - scores.append(1.0 / doc.metadata.distance) + scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf")) return QueryChunksResponse(chunks=chunks, scores=scores) diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py index f1cac9701..f550cf666 100644 --- a/tests/integration/vector_io/test_vector_io.py +++ b/tests/integration/vector_io/test_vector_io.py @@ -154,3 +154,36 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e assert len(response.chunks) > 0 assert response.chunks[0].metadata["document_id"] == "doc1" assert response.chunks[0].metadata["source"] == "precomputed" + + +def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(client_with_empty_registry, embedding_model_id): + vector_db_id = "test_precomputed_embeddings_db" + client_with_empty_registry.vector_dbs.register( + vector_db_id=vector_db_id, + embedding_model=embedding_model_id, + embedding_dimension=384, + ) + + chunks_with_embeddings = [ + Chunk( + content="duplicate", + metadata={"document_id": "doc1", "source": "precomputed"}, + embedding=[0.1] * 384, + ), + ] + + client_with_empty_registry.vector_io.insert( + vector_db_id=vector_db_id, + chunks=chunks_with_embeddings, + ) + + response = client_with_empty_registry.vector_io.query( + vector_db_id=vector_db_id, + query="duplicate", + ) + + # Verify the top result is the expected document + assert response is not None + assert len(response.chunks) > 0 + assert response.chunks[0].metadata["document_id"] == "doc1" + assert response.chunks[0].metadata["source"] == "precomputed"