Make embedding generation go through inference (#606)

This PR does the following: 1) adds the ability to generate embeddings in all supported inference providers. 2) Moves all the memory providers to use the inference API and improved the memory tests to setup the inference stack correctly and use the embedding models This is a merge from #589 and #598
2025-12-04 10:10:36 +00:00 · 2024-12-12 11:47:50 -08:00 · 2024-12-12 11:47:50 -08:00 · 96e158eaac
commit 96e158eaac
parent a14785af46
37 changed files with 677 additions and 156 deletions
--- a/llama_stack/providers/tests/memory/conftest.py
+++ b/llama_stack/providers/tests/memory/conftest.py
@ -6,9 +6,65 @@

 import pytest

+from ..conftest import get_provider_fixture_overrides
+
+from ..inference.fixtures import INFERENCE_FIXTURES
 from .fixtures import MEMORY_FIXTURES


+DEFAULT_PROVIDER_COMBINATIONS = [
+    pytest.param(
+        {
+            "inference": "meta_reference",
+            "memory": "faiss",
+        },
+        id="meta_reference",
+        marks=pytest.mark.meta_reference,
+    ),
+    pytest.param(
+        {
+            "inference": "ollama",
+            "memory": "pgvector",
+        },
+        id="ollama",
+        marks=pytest.mark.ollama,
+    ),
+    pytest.param(
+        {
+            "inference": "together",
+            "memory": "chroma",
+        },
+        id="chroma",
+        marks=pytest.mark.chroma,
+    ),
+    pytest.param(
+        {
+            "inference": "bedrock",
+            "memory": "qdrant",
+        },
+        id="qdrant",
+        marks=pytest.mark.qdrant,
+    ),
+    pytest.param(
+        {
+            "inference": "fireworks",
+            "memory": "weaviate",
+        },
+        id="weaviate",
+        marks=pytest.mark.weaviate,
+    ),
+]
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--inference-model",
+        action="store",
+        default=None,
+        help="Specify the inference model to use for testing",
+    )
+
+
 def pytest_configure(config):
    for fixture_name in MEMORY_FIXTURES:
        config.addinivalue_line(
@ -18,12 +74,22 @@ def pytest_configure(config):


 def pytest_generate_tests(metafunc):
+    if "inference_model" in metafunc.fixturenames:
+        model = metafunc.config.getoption("--inference-model")
+        if not model:
+            raise ValueError(
+                "No inference model specified. Please provide a valid inference model."
+            )
+        params = [pytest.param(model, id="")]
+
+        metafunc.parametrize("inference_model", params, indirect=True)
    if "memory_stack" in metafunc.fixturenames:
-        metafunc.parametrize(
-            "memory_stack",
-            [
-                pytest.param(fixture_name, marks=getattr(pytest.mark, fixture_name))
-                for fixture_name in MEMORY_FIXTURES
-            ],
-            indirect=True,
+        available_fixtures = {
+            "inference": INFERENCE_FIXTURES,
+            "memory": MEMORY_FIXTURES,
+        }
+        combinations = (
+            get_provider_fixture_overrides(metafunc.config, available_fixtures)
+            or DEFAULT_PROVIDER_COMBINATIONS
        )
+        metafunc.parametrize("memory_stack", combinations, indirect=True)