Merge branch 'main' into feat/litellm_sambanova_usage

2025-12-28 04:31:59 +00:00 · 2025-03-19 16:51:59 -05:00 · 2025-03-19 16:51:59 -05:00 · 02a4f9ac59
commit 02a4f9ac59
parent c2c27a7b2b ab777ef5cd
69 changed files with 1128 additions and 445 deletions
--- a/tests/unit/models/test_prompt_adapter.py
+++ b/tests/unit/models/test_prompt_adapter.py
@ -165,7 +165,10 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase):
        request.model = MODEL
        request.tool_config.tool_prompt_format = ToolPromptFormat.json
        prompt = await chat_completion_request_to_prompt(request, request.model)
-        self.assertIn('{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}', prompt)
+        self.assertIn(
+            '{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}',
+            prompt,
+        )

    async def test_user_provided_system_message(self):
        content = "Hello !"
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@ -0,0 +1,42 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import random
+
+import numpy as np
+import pytest
+
+from llama_stack.apis.vector_io import Chunk
+
+EMBEDDING_DIMENSION = 384
+
+
+@pytest.fixture
+def vector_db_id() -> str:
+    return f"test-vector-db-{random.randint(1, 100)}"
+
+
+@pytest.fixture(scope="session")
+def embedding_dimension() -> int:
+    return EMBEDDING_DIMENSION
+
+
+@pytest.fixture(scope="session")
+def sample_chunks():
+    """Generates chunks that force multiple batches for a single document to expose ID conflicts."""
+    n, k = 10, 3
+    sample = [
+        Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
+        for j in range(k)
+        for i in range(n)
+    ]
+    return sample
+
+
+@pytest.fixture(scope="session")
+def sample_embeddings(sample_chunks):
+    np.random.seed(42)
+    return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks])
--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@ -0,0 +1,135 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import asyncio
+import os
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+import pytest_asyncio
+
+from llama_stack.apis.inference import EmbeddingsResponse, Inference
+from llama_stack.apis.vector_io import (
+    QueryChunksResponse,
+    VectorDB,
+    VectorDBStore,
+)
+from llama_stack.providers.inline.vector_io.qdrant.config import (
+    QdrantVectorIOConfig as InlineQdrantVectorIOConfig,
+)
+from llama_stack.providers.remote.vector_io.qdrant.qdrant import (
+    QdrantVectorIOAdapter,
+)
+
+# This test is a unit test for the QdrantVectorIOAdapter class. This should only contain
+# tests which are specific to this class. More general (API-level) tests should be placed in
+# tests/integration/vector_io/
+#
+# How to run this test:
+#
+# pytest tests/unit/providers/vector_io/test_qdrant.py \
+# -v -s --tb=short --disable-warnings --asyncio-mode=auto
+
+
+@pytest.fixture
+def qdrant_config(tmp_path) -> InlineQdrantVectorIOConfig:
+    return InlineQdrantVectorIOConfig(path=os.path.join(tmp_path, "qdrant.db"))
+
+
+@pytest.fixture(scope="session")
+def loop():
+    return asyncio.new_event_loop()
+
+
+@pytest.fixture
+def mock_vector_db(vector_db_id) -> MagicMock:
+    mock_vector_db = MagicMock(spec=VectorDB)
+    mock_vector_db.embedding_model = "embedding_model"
+    mock_vector_db.identifier = vector_db_id
+    return mock_vector_db
+
+
+@pytest.fixture
+def mock_vector_db_store(mock_vector_db) -> MagicMock:
+    mock_store = MagicMock(spec=VectorDBStore)
+    mock_store.get_vector_db = AsyncMock(return_value=mock_vector_db)
+    return mock_store
+
+
+@pytest.fixture
+def mock_api_service(sample_embeddings):
+    mock_api_service = MagicMock(spec=Inference)
+    mock_api_service.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings))
+    return mock_api_service
+
+
+@pytest_asyncio.fixture
+async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter:
+    adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service)
+    adapter.vector_db_store = mock_vector_db_store
+    await adapter.initialize()
+    yield adapter
+    await adapter.shutdown()
+
+
+__QUERY = "Sample query"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)])
+async def test_qdrant_adapter_returns_expected_chunks(
+    qdrant_adapter: QdrantVectorIOAdapter,
+    vector_db_id,
+    sample_chunks,
+    sample_embeddings,
+    max_query_chunks,
+    expected_chunks,
+) -> None:
+    assert qdrant_adapter is not None
+    await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks)
+
+    index = await qdrant_adapter._get_and_cache_vector_db_index(vector_db_id=vector_db_id)
+    assert index is not None
+
+    response = await qdrant_adapter.query_chunks(
+        query=__QUERY,
+        vector_db_id=vector_db_id,
+        params={"max_chunks": max_query_chunks},
+    )
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == expected_chunks
+
+
+# To by-pass attempt to convert a Mock to JSON
+def _prepare_for_json(value: Any) -> str:
+    return str(value)
+
+
+@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json)
+@pytest.mark.asyncio
+async def test_qdrant_register_and_unregister_vector_db(
+    qdrant_adapter: QdrantVectorIOAdapter,
+    mock_vector_db,
+    sample_chunks,
+) -> None:
+    # Initially, no collections
+    vector_db_id = mock_vector_db.identifier
+    assert len((await qdrant_adapter.client.get_collections()).collections) == 0
+
+    # Register does not create a collection
+    assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
+    await qdrant_adapter.register_vector_db(mock_vector_db)
+    assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
+
+    # First insert creates the collection
+    await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks)
+    assert await qdrant_adapter.client.collection_exists(vector_db_id)
+
+    # Unregister deletes the collection
+    await qdrant_adapter.unregister_vector_db(vector_db_id)
+    assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
+    assert len((await qdrant_adapter.client.get_collections()).collections) == 0
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@ -29,8 +29,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
 # -v -s --tb=short --disable-warnings --asyncio-mode=auto

 SQLITE_VEC_PROVIDER = "sqlite_vec"
-EMBEDDING_DIMENSION = 384
-EMBEDDING_MODEL = "all-MiniLM-L6-v2"


@pytest.fixture(scope="session")
@ -50,26 +48,8 @@ def sqlite_connection(loop):


@pytest_asyncio.fixture(scope="session", autouse=True)
-async def sqlite_vec_index(sqlite_connection):
-    return await SQLiteVecIndex.create(dimension=EMBEDDING_DIMENSION, connection=sqlite_connection, bank_id="test_bank")
-
-
-@pytest.fixture(scope="session")
-def sample_chunks():
-    """Generates chunks that force multiple batches for a single document to expose ID conflicts."""
-    n, k = 10, 3
-    sample = [
-        Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"})
-        for j in range(k)
-        for i in range(n)
-    ]
-    return sample
-
-
-@pytest.fixture(scope="session")
-def sample_embeddings(sample_chunks):
-    np.random.seed(42)
-    return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks])
+async def sqlite_vec_index(sqlite_connection, embedding_dimension):
+    return await SQLiteVecIndex.create(dimension=embedding_dimension, connection=sqlite_connection, bank_id="test_bank")


@pytest.mark.asyncio
@ -82,21 +62,21 @@ async def test_add_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):


@pytest.mark.asyncio
-async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):
+async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension):
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
-    query_embedding = np.random.rand(EMBEDDING_DIMENSION).astype(np.float32)
+    query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
    response = await sqlite_vec_index.query(query_embedding, k=2, score_threshold=0.0)
    assert isinstance(response, QueryChunksResponse)
    assert len(response.chunks) == 2


@pytest.mark.asyncio
-async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks):
+async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension):
    """Test that chunk IDs do not conflict across batches when inserting chunks."""
    # Reduce batch size to force multiple batches for same document
    # since there are 10 chunks per document and batch size is 2
    batch_size = 2
-    sample_embeddings = np.random.rand(len(sample_chunks), EMBEDDING_DIMENSION).astype(np.float32)
+    sample_embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32)

    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings, batch_size=batch_size)

--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@ -0,0 +1,124 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+
+from llama_stack.distribution.server.auth import AuthenticationMiddleware
+
+
+@pytest.fixture
+def mock_auth_endpoint():
+    return "http://mock-auth-service/validate"
+
+
+@pytest.fixture
+def valid_api_key():
+    return "valid_api_key_12345"
+
+
+@pytest.fixture
+def invalid_api_key():
+    return "invalid_api_key_67890"
+
+
+@pytest.fixture
+def app(mock_auth_endpoint):
+    app = FastAPI()
+    app.add_middleware(AuthenticationMiddleware, auth_endpoint=mock_auth_endpoint)
+
+    @app.get("/test")
+    def test_endpoint():
+        return {"message": "Authentication successful"}
+
+    return app
+
+
+@pytest.fixture
+def client(app):
+    return TestClient(app)
+
+
+async def mock_post_success(*args, **kwargs):
+    mock_response = AsyncMock()
+    mock_response.status_code = 200
+    return mock_response
+
+
+async def mock_post_failure(*args, **kwargs):
+    mock_response = AsyncMock()
+    mock_response.status_code = 401
+    return mock_response
+
+
+async def mock_post_exception(*args, **kwargs):
+    raise Exception("Connection error")
+
+
+def test_missing_auth_header(client):
+    response = client.get("/test")
+    assert response.status_code == 401
+    assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
+
+
+def test_invalid_auth_header_format(client):
+    response = client.get("/test", headers={"Authorization": "InvalidFormat token123"})
+    assert response.status_code == 401
+    assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
+
+
+@patch("httpx.AsyncClient.post", new=mock_post_success)
+def test_valid_authentication(client, valid_api_key):
+    response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
+    assert response.status_code == 200
+    assert response.json() == {"message": "Authentication successful"}
+
+
+@patch("httpx.AsyncClient.post", new=mock_post_failure)
+def test_invalid_authentication(client, invalid_api_key):
+    response = client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
+    assert response.status_code == 401
+    assert "Authentication failed" in response.json()["error"]["message"]
+
+
+@patch("httpx.AsyncClient.post", new=mock_post_exception)
+def test_auth_service_error(client, valid_api_key):
+    response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
+    assert response.status_code == 401
+    assert "Authentication service error" in response.json()["error"]["message"]
+
+
+def test_auth_request_payload(client, valid_api_key, mock_auth_endpoint):
+    with patch("httpx.AsyncClient.post") as mock_post:
+        mock_response = AsyncMock()
+        mock_response.status_code = 200
+        mock_post.return_value = mock_response
+
+        client.get(
+            "/test?param1=value1&param2=value2",
+            headers={
+                "Authorization": f"Bearer {valid_api_key}",
+                "User-Agent": "TestClient",
+                "Content-Type": "application/json",
+            },
+        )
+
+        # Check that the auth endpoint was called with the correct payload
+        call_args = mock_post.call_args
+        assert call_args is not None
+
+        url, kwargs = call_args[0][0], call_args[1]
+        assert url == mock_auth_endpoint
+
+        payload = kwargs["json"]
+        assert payload["api_key"] == valid_api_key
+        assert payload["request"]["path"] == "/test"
+        assert "authorization" in payload["request"]["headers"]
+        assert "param1" in payload["request"]["params"]
+        assert "param2" in payload["request"]["params"]