Merge branch 'main' into vest_search

2025-12-27 15:12:00 +00:00 · 2025-06-12 15:32:41 -07:00 · 2025-06-12 15:32:41 -07:00 · 7893641de1
commit 7893641de1
parent d54c58c8dd 35c2817d0a
5 changed files with 42 additions and 6 deletions
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@ -45,20 +45,22 @@ jobs:

      - name: Build distro from config file
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml

      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
-          uv run pip list
-          nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          # Use the virtual environment created by the build step (name comes from build config)
+          source ci-test/bin/activate
+          uv pip list
+          nohup llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &

      - name: Wait for Llama Stack server to be ready
        run: |
          for i in {1..30}; do
-            if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
+            if ! grep -q "Successfully loaded external provider remote::custom_ollama" server.log; then
              echo "Waiting for Llama Stack server to load the provider..."
              sleep 1
            else
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -180,6 +180,7 @@ def get_provider_registry(
                        if provider_type_key in ret[api]:
                            logger.warning(f"Overriding already registered provider {provider_type_key} for {api.name}")
                        ret[api][provider_type_key] = spec
+                        logger.info(f"Successfully loaded external provider {provider_type_key}")
                    except yaml.YAMLError as yaml_err:
                        logger.error(f"Failed to parse YAML file {spec_path}: {yaml_err}")
                        raise yaml_err
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@ -115,7 +115,7 @@ def parse_environment_config(env_config: str) -> dict[str, int]:

 class CustomRichHandler(RichHandler):
    def __init__(self, *args, **kwargs):
-        kwargs["console"] = Console(width=120)
+        kwargs["console"] = Console(width=150)
        super().__init__(*args, **kwargs)

    def emit(self, record):
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@ -76,7 +76,7 @@ class WeaviateIndex(EmbeddingIndex):
                continue

            chunks.append(chunk)
-            scores.append(1.0 / doc.metadata.distance)
+            scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf"))

        return QueryChunksResponse(chunks=chunks, scores=scores)

--- a/tests/integration/vector_io/test_vector_io.py
+++ b/tests/integration/vector_io/test_vector_io.py
@ -154,3 +154,36 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
    assert len(response.chunks) > 0
    assert response.chunks[0].metadata["document_id"] == "doc1"
    assert response.chunks[0].metadata["source"] == "precomputed"
+
+
+def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(client_with_empty_registry, embedding_model_id):
+    vector_db_id = "test_precomputed_embeddings_db"
+    client_with_empty_registry.vector_dbs.register(
+        vector_db_id=vector_db_id,
+        embedding_model=embedding_model_id,
+        embedding_dimension=384,
+    )
+
+    chunks_with_embeddings = [
+        Chunk(
+            content="duplicate",
+            metadata={"document_id": "doc1", "source": "precomputed"},
+            embedding=[0.1] * 384,
+        ),
+    ]
+
+    client_with_empty_registry.vector_io.insert(
+        vector_db_id=vector_db_id,
+        chunks=chunks_with_embeddings,
+    )
+
+    response = client_with_empty_registry.vector_io.query(
+        vector_db_id=vector_db_id,
+        query="duplicate",
+    )
+
+    # Verify the top result is the expected document
+    assert response is not None
+    assert len(response.chunks) > 0
+    assert response.chunks[0].metadata["document_id"] == "doc1"
+    assert response.chunks[0].metadata["source"] == "precomputed"