From a7ed86181c8b823ca13af0c911be694e49899be9 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Mon, 14 Jul 2025 18:58:23 +0100
Subject: [PATCH 1/2] fix(faiss): Delete file contents from kvstore (#2686)

Remove both the metadata and content from the kvstore when a file is
being removed from the vector store.

Closes: #2685

Also add faiss provider to openai_vector_stores test suite

---------

Signed-off-by: Derek Higgins <derekh@redhat.com>
Co-authored-by: raghotham <rsm@meta.com>
---
 .../providers/inline/vector_io/faiss/faiss.py | 21 +++++++--
 tests/unit/providers/vector_io/conftest.py    | 44 ++++++++++++++++++-
 .../test_vector_io_openai_vector_stores.py    |  6 ++-
 3 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 62a98413d..0306d9156 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -267,6 +267,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         assert self.kvstore is not None
         key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
         await self.kvstore.set(key=key, value=json.dumps(store_info))
+        self.openai_vector_stores[store_id] = store_info
 
     async def _load_openai_vector_stores(self) -> dict[str, dict[str, Any]]:
         """Load all vector store metadata from kvstore."""
@@ -286,17 +287,20 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         assert self.kvstore is not None
         key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
         await self.kvstore.set(key=key, value=json.dumps(store_info))
+        self.openai_vector_stores[store_id] = store_info
 
     async def _delete_openai_vector_store_from_storage(self, store_id: str) -> None:
         """Delete vector store metadata from kvstore."""
         assert self.kvstore is not None
         key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
         await self.kvstore.delete(key)
+        if store_id in self.openai_vector_stores:
+            del self.openai_vector_stores[store_id]
 
     async def _save_openai_vector_store_file(
         self, store_id: str, file_id: str, file_info: dict[str, Any], file_contents: list[dict[str, Any]]
     ) -> None:
-        """Save vector store file metadata to kvstore."""
+        """Save vector store file data to kvstore."""
         assert self.kvstore is not None
         key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
         await self.kvstore.set(key=key, value=json.dumps(file_info))
@@ -324,7 +328,16 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
         await self.kvstore.set(key=key, value=json.dumps(file_info))
 
     async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None:
-        """Delete vector store file metadata from kvstore."""
+        """Delete vector store data from kvstore."""
         assert self.kvstore is not None
-        key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}"
-        await self.kvstore.delete(key)
+
+        keys_to_delete = [
+            f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}",
+            f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}",
+        ]
+        for key in keys_to_delete:
+            try:
+                await self.kvstore.delete(key)
+            except Exception as e:
+                logger.warning(f"Failed to delete key {key}: {e}")
+                continue
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 4a9639326..9f86f877d 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -12,6 +12,8 @@ from pymilvus import MilvusClient, connections
 
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
 from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
@@ -90,7 +92,7 @@ def sample_embeddings_with_metadata(sample_chunks_with_metadata):
     return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks_with_metadata])
 
 
-@pytest.fixture(params=["milvus", "sqlite_vec"])
+@pytest.fixture(params=["milvus", "sqlite_vec", "faiss"])
 def vector_provider(request):
     return request.param
 
@@ -116,7 +118,7 @@ async def unique_kvstore_config(tmp_path_factory):
 
 @pytest.fixture(scope="session")
 def sqlite_vec_db_path(tmp_path_factory):
-    db_path = str(tmp_path_factory.getbasetemp() / "test.db")
+    db_path = str(tmp_path_factory.getbasetemp() / "test_sqlite_vec.db")
     return db_path
 
 
@@ -198,11 +200,49 @@ async def milvus_vec_adapter(milvus_vec_db_path, mock_inference_api):
     await adapter.shutdown()
 
 
+@pytest.fixture
+def faiss_vec_db_path(tmp_path_factory):
+    db_path = str(tmp_path_factory.getbasetemp() / "test_faiss.db")
+    return db_path
+
+
+@pytest.fixture
+async def faiss_vec_index(embedding_dimension):
+    index = FaissIndex(embedding_dimension)
+    yield index
+    await index.delete()
+
+
+@pytest.fixture
+async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension):
+    config = FaissVectorIOConfig(
+        kvstore=unique_kvstore_config,
+    )
+    adapter = FaissVectorIOAdapter(
+        config=config,
+        inference_api=mock_inference_api,
+        files_api=None,
+    )
+    await adapter.initialize()
+    await adapter.register_vector_db(
+        VectorDB(
+            identifier=f"faiss_test_collection_{np.random.randint(1e6)}",
+            provider_id="test_provider",
+            embedding_model="test_model",
+            embedding_dimension=embedding_dimension,
+        )
+    )
+    yield adapter
+    await adapter.shutdown()
+
+
 @pytest.fixture
 def vector_io_adapter(vector_provider, request):
     """Returns the appropriate vector IO adapter based on the provider parameter."""
     if vector_provider == "milvus":
         return request.getfixturevalue("milvus_vec_adapter")
+    elif vector_provider == "faiss":
+        return request.getfixturevalue("faiss_vec_adapter")
     else:
         return request.getfixturevalue("sqlite_vec_adapter")
 
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 97e2f085e..bf7663d2e 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -94,7 +94,7 @@ async def test_query_unregistered_raises(vector_io_adapter):
 
 async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
     fake_index = AsyncMock()
-    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index)
+    vector_io_adapter.cache["db1"] = fake_index
 
     chunks = ["chunk1", "chunk2"]
     await vector_io_adapter.insert_chunks("db1", chunks)
@@ -112,7 +112,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
 async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
     expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
     fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
-    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index)
+    vector_io_adapter.cache["db1"] = fake_index
 
     response = await vector_io_adapter.query_chunks("db1", "my_query", {"param": 1})
 
@@ -286,5 +286,7 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t
     await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
     await vector_io_adapter._delete_openai_vector_store_file_from_storage(store_id, file_id)
 
+    loaded_file_info = await vector_io_adapter._load_openai_vector_store_file(store_id, file_id)
+    assert loaded_file_info == {}
     loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
     assert loaded_contents == []

From f731f369a2be1ad1d28ff8b3e1b9f59ae261451e Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 14 Jul 2025 14:38:53 -0400
Subject: [PATCH 2/2] feat: add infrastructure to allow inference model
 discovery (#2710)

# What does this PR do?

inference providers each have a static list of supported / known models.
some also have access to a dynamic list of currently available models.
this change gives prodivers using the ModelRegistryHelper the ability to
combine their static and dynamic lists.

for instance, OpenAIInferenceAdapter can implement
```
   def query_available_models(self) -> list[str]:
      return [entry.model for entry in self.openai_client.models.list()]
```
to augment its static list w/ a current list from openai.

## Test Plan

scripts/unit-test.sh
---
 .../utils/inference/model_registry.py         | 33 ++++++-
 .../providers/utils/test_model_registry.py    | 91 +++++++++++++++++++
 2 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index c2fc13e07..801b8ea06 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -83,9 +83,37 @@ class ModelRegistryHelper(ModelsProtocolPrivate):
     def get_llama_model(self, provider_model_id: str) -> str | None:
         return self.provider_id_to_llama_model_map.get(provider_model_id, None)
 
+    async def check_model_availability(self, model: str) -> bool:
+        """
+        Check if a specific model is available from the provider (non-static check).
+
+        This is for subclassing purposes, so providers can check if a specific
+        model is currently available for use through dynamic means (e.g., API calls).
+
+        This method should NOT check statically configured model entries in
+        `self.alias_to_provider_id_map` - that is handled separately in register_model.
+
+        Default implementation returns False (no dynamic models available).
+
+        :param model: The model identifier to check.
+        :return: True if the model is available dynamically, False otherwise.
+        """
+        return False
+
     async def register_model(self, model: Model) -> Model:
-        if not (supported_model_id := self.get_provider_model_id(model.provider_resource_id)):
-            raise UnsupportedModelError(model.provider_resource_id, self.alias_to_provider_id_map.keys())
+        # Check if model is supported in static configuration
+        supported_model_id = self.get_provider_model_id(model.provider_resource_id)
+
+        # If not found in static config, check if it's available dynamically from provider
+        if not supported_model_id:
+            if await self.check_model_availability(model.provider_resource_id):
+                supported_model_id = model.provider_resource_id
+            else:
+                # note: we cannot provide a complete list of supported models without
+                #       getting a complete list from the provider, so we return "..."
+                all_supported_models = [*self.alias_to_provider_id_map.keys(), "..."]
+                raise UnsupportedModelError(model.provider_resource_id, all_supported_models)
+
         provider_resource_id = self.get_provider_model_id(model.model_id)
         if model.model_type == ModelType.embedding:
             # embedding models are always registered by their provider model id and does not need to be mapped to a llama model
@@ -114,6 +142,7 @@ class ModelRegistryHelper(ModelsProtocolPrivate):
                         ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR[llama_model]
                     )
 
+        # Register the model alias, ensuring it maps to the correct provider model id
         self.alias_to_provider_id_map[model.model_id] = supported_model_id
 
         return model
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index e11f95d49..1a1705961 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -87,6 +87,37 @@ def helper(known_provider_model: ProviderModelEntry, known_provider_model2: Prov
     return ModelRegistryHelper([known_provider_model, known_provider_model2])
 
 
+class MockModelRegistryHelperWithDynamicModels(ModelRegistryHelper):
+    """Test helper that simulates a provider with dynamically available models."""
+
+    def __init__(self, model_entries: list[ProviderModelEntry], available_models: list[str]):
+        super().__init__(model_entries)
+        self._available_models = available_models
+
+    async def check_model_availability(self, model: str) -> bool:
+        return model in self._available_models
+
+
+@pytest.fixture
+def dynamic_model() -> Model:
+    """A model that's not in static config but available dynamically."""
+    return Model(
+        provider_id="provider",
+        identifier="dynamic-model",
+        provider_resource_id="dynamic-provider-id",
+    )
+
+
+@pytest.fixture
+def helper_with_dynamic_models(
+    known_provider_model: ProviderModelEntry, known_provider_model2: ProviderModelEntry, dynamic_model: Model
+) -> MockModelRegistryHelperWithDynamicModels:
+    """Helper that includes dynamically available models."""
+    return MockModelRegistryHelperWithDynamicModels(
+        [known_provider_model, known_provider_model2], [dynamic_model.provider_resource_id]
+    )
+
+
 async def test_lookup_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
     assert helper.get_provider_model_id(unknown_model.model_id) is None
 
@@ -151,3 +182,63 @@ async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_m
     assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
     await helper.unregister_model(known_model.provider_resource_id)
     assert helper.get_provider_model_id(known_model.provider_resource_id) is None
+
+
+async def test_register_model_from_check_model_availability(
+    helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, dynamic_model: Model
+) -> None:
+    """Test that models returned by check_model_availability can be registered."""
+    # Verify the model is not in static config
+    assert helper_with_dynamic_models.get_provider_model_id(dynamic_model.provider_resource_id) is None
+
+    # But it should be available via check_model_availability
+    is_available = await helper_with_dynamic_models.check_model_availability(dynamic_model.provider_resource_id)
+    assert is_available
+
+    # Registration should succeed
+    registered_model = await helper_with_dynamic_models.register_model(dynamic_model)
+    assert registered_model == dynamic_model
+
+    # Model should now be registered and accessible
+    assert (
+        helper_with_dynamic_models.get_provider_model_id(dynamic_model.model_id) == dynamic_model.provider_resource_id
+    )
+
+
+async def test_register_model_not_in_static_or_dynamic(
+    helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, unknown_model: Model
+) -> None:
+    """Test that models not in static config or dynamic models are rejected."""
+    # Verify the model is not in static config
+    assert helper_with_dynamic_models.get_provider_model_id(unknown_model.provider_resource_id) is None
+
+    # And not available via check_model_availability
+    is_available = await helper_with_dynamic_models.check_model_availability(unknown_model.provider_resource_id)
+    assert not is_available
+
+    # Registration should fail with comprehensive error message
+    with pytest.raises(Exception) as exc_info:  # UnsupportedModelError
+        await helper_with_dynamic_models.register_model(unknown_model)
+
+    # Error should include static models and "..." for dynamic models
+    error_str = str(exc_info.value)
+    assert "..." in error_str  # "..." should be in error message
+
+
+async def test_register_alias_for_dynamic_model(
+    helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, dynamic_model: Model
+) -> None:
+    """Test that we can register an alias that maps to a dynamically available model."""
+    # Create a model with a different identifier but same provider_resource_id
+    alias_model = Model(
+        provider_id=dynamic_model.provider_id,
+        identifier="dynamic-model-alias",
+        provider_resource_id=dynamic_model.provider_resource_id,
+    )
+
+    # Registration should succeed since the provider_resource_id is available dynamically
+    registered_model = await helper_with_dynamic_models.register_model(alias_model)
+    assert registered_model == alias_model
+
+    # Both the original provider_resource_id and the new alias should work
+    assert helper_with_dynamic_models.get_provider_model_id(alias_model.model_id) == dynamic_model.provider_resource_id