From a7ed86181c8b823ca13af0c911be694e49899be9 Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Mon, 14 Jul 2025 18:58:23 +0100 Subject: [PATCH 1/2] fix(faiss): Delete file contents from kvstore (#2686) Remove both the metadata and content from the kvstore when a file is being removed from the vector store. Closes: #2685 Also add faiss provider to openai_vector_stores test suite --------- Signed-off-by: Derek Higgins Co-authored-by: raghotham --- .../providers/inline/vector_io/faiss/faiss.py | 21 +++++++-- tests/unit/providers/vector_io/conftest.py | 44 ++++++++++++++++++- .../test_vector_io_openai_vector_stores.py | 6 ++- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index 62a98413d..0306d9156 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -267,6 +267,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr assert self.kvstore is not None key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}" await self.kvstore.set(key=key, value=json.dumps(store_info)) + self.openai_vector_stores[store_id] = store_info async def _load_openai_vector_stores(self) -> dict[str, dict[str, Any]]: """Load all vector store metadata from kvstore.""" @@ -286,17 +287,20 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr assert self.kvstore is not None key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}" await self.kvstore.set(key=key, value=json.dumps(store_info)) + self.openai_vector_stores[store_id] = store_info async def _delete_openai_vector_store_from_storage(self, store_id: str) -> None: """Delete vector store metadata from kvstore.""" assert self.kvstore is not None key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}" await self.kvstore.delete(key) + if store_id in self.openai_vector_stores: + del self.openai_vector_stores[store_id] async def _save_openai_vector_store_file( self, store_id: str, file_id: str, file_info: dict[str, Any], file_contents: list[dict[str, Any]] ) -> None: - """Save vector store file metadata to kvstore.""" + """Save vector store file data to kvstore.""" assert self.kvstore is not None key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}" await self.kvstore.set(key=key, value=json.dumps(file_info)) @@ -324,7 +328,16 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr await self.kvstore.set(key=key, value=json.dumps(file_info)) async def _delete_openai_vector_store_file_from_storage(self, store_id: str, file_id: str) -> None: - """Delete vector store file metadata from kvstore.""" + """Delete vector store data from kvstore.""" assert self.kvstore is not None - key = f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}" - await self.kvstore.delete(key) + + keys_to_delete = [ + f"{OPENAI_VECTOR_STORES_FILES_PREFIX}{store_id}:{file_id}", + f"{OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX}{store_id}:{file_id}", + ] + for key in keys_to_delete: + try: + await self.kvstore.delete(key) + except Exception as e: + logger.warning(f"Failed to delete key {key}: {e}") + continue diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index 4a9639326..9f86f877d 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -12,6 +12,8 @@ from pymilvus import MilvusClient, connections from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata +from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig +from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter @@ -90,7 +92,7 @@ def sample_embeddings_with_metadata(sample_chunks_with_metadata): return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks_with_metadata]) -@pytest.fixture(params=["milvus", "sqlite_vec"]) +@pytest.fixture(params=["milvus", "sqlite_vec", "faiss"]) def vector_provider(request): return request.param @@ -116,7 +118,7 @@ async def unique_kvstore_config(tmp_path_factory): @pytest.fixture(scope="session") def sqlite_vec_db_path(tmp_path_factory): - db_path = str(tmp_path_factory.getbasetemp() / "test.db") + db_path = str(tmp_path_factory.getbasetemp() / "test_sqlite_vec.db") return db_path @@ -198,11 +200,49 @@ async def milvus_vec_adapter(milvus_vec_db_path, mock_inference_api): await adapter.shutdown() +@pytest.fixture +def faiss_vec_db_path(tmp_path_factory): + db_path = str(tmp_path_factory.getbasetemp() / "test_faiss.db") + return db_path + + +@pytest.fixture +async def faiss_vec_index(embedding_dimension): + index = FaissIndex(embedding_dimension) + yield index + await index.delete() + + +@pytest.fixture +async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension): + config = FaissVectorIOConfig( + kvstore=unique_kvstore_config, + ) + adapter = FaissVectorIOAdapter( + config=config, + inference_api=mock_inference_api, + files_api=None, + ) + await adapter.initialize() + await adapter.register_vector_db( + VectorDB( + identifier=f"faiss_test_collection_{np.random.randint(1e6)}", + provider_id="test_provider", + embedding_model="test_model", + embedding_dimension=embedding_dimension, + ) + ) + yield adapter + await adapter.shutdown() + + @pytest.fixture def vector_io_adapter(vector_provider, request): """Returns the appropriate vector IO adapter based on the provider parameter.""" if vector_provider == "milvus": return request.getfixturevalue("milvus_vec_adapter") + elif vector_provider == "faiss": + return request.getfixturevalue("faiss_vec_adapter") else: return request.getfixturevalue("sqlite_vec_adapter") diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 97e2f085e..bf7663d2e 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -94,7 +94,7 @@ async def test_query_unregistered_raises(vector_io_adapter): async def test_insert_chunks_calls_underlying_index(vector_io_adapter): fake_index = AsyncMock() - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index) + vector_io_adapter.cache["db1"] = fake_index chunks = ["chunk1", "chunk2"] await vector_io_adapter.insert_chunks("db1", chunks) @@ -112,7 +112,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter): async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter): expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1]) fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected)) - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index) + vector_io_adapter.cache["db1"] = fake_index response = await vector_io_adapter.query_chunks("db1", "my_query", {"param": 1}) @@ -286,5 +286,7 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) await vector_io_adapter._delete_openai_vector_store_file_from_storage(store_id, file_id) + loaded_file_info = await vector_io_adapter._load_openai_vector_store_file(store_id, file_id) + assert loaded_file_info == {} loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id) assert loaded_contents == [] From f731f369a2be1ad1d28ff8b3e1b9f59ae261451e Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Mon, 14 Jul 2025 14:38:53 -0400 Subject: [PATCH 2/2] feat: add infrastructure to allow inference model discovery (#2710) # What does this PR do? inference providers each have a static list of supported / known models. some also have access to a dynamic list of currently available models. this change gives prodivers using the ModelRegistryHelper the ability to combine their static and dynamic lists. for instance, OpenAIInferenceAdapter can implement ``` def query_available_models(self) -> list[str]: return [entry.model for entry in self.openai_client.models.list()] ``` to augment its static list w/ a current list from openai. ## Test Plan scripts/unit-test.sh --- .../utils/inference/model_registry.py | 33 ++++++- .../providers/utils/test_model_registry.py | 91 +++++++++++++++++++ 2 files changed, 122 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index c2fc13e07..801b8ea06 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -83,9 +83,37 @@ class ModelRegistryHelper(ModelsProtocolPrivate): def get_llama_model(self, provider_model_id: str) -> str | None: return self.provider_id_to_llama_model_map.get(provider_model_id, None) + async def check_model_availability(self, model: str) -> bool: + """ + Check if a specific model is available from the provider (non-static check). + + This is for subclassing purposes, so providers can check if a specific + model is currently available for use through dynamic means (e.g., API calls). + + This method should NOT check statically configured model entries in + `self.alias_to_provider_id_map` - that is handled separately in register_model. + + Default implementation returns False (no dynamic models available). + + :param model: The model identifier to check. + :return: True if the model is available dynamically, False otherwise. + """ + return False + async def register_model(self, model: Model) -> Model: - if not (supported_model_id := self.get_provider_model_id(model.provider_resource_id)): - raise UnsupportedModelError(model.provider_resource_id, self.alias_to_provider_id_map.keys()) + # Check if model is supported in static configuration + supported_model_id = self.get_provider_model_id(model.provider_resource_id) + + # If not found in static config, check if it's available dynamically from provider + if not supported_model_id: + if await self.check_model_availability(model.provider_resource_id): + supported_model_id = model.provider_resource_id + else: + # note: we cannot provide a complete list of supported models without + # getting a complete list from the provider, so we return "..." + all_supported_models = [*self.alias_to_provider_id_map.keys(), "..."] + raise UnsupportedModelError(model.provider_resource_id, all_supported_models) + provider_resource_id = self.get_provider_model_id(model.model_id) if model.model_type == ModelType.embedding: # embedding models are always registered by their provider model id and does not need to be mapped to a llama model @@ -114,6 +142,7 @@ class ModelRegistryHelper(ModelsProtocolPrivate): ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR[llama_model] ) + # Register the model alias, ensuring it maps to the correct provider model id self.alias_to_provider_id_map[model.model_id] = supported_model_id return model diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index e11f95d49..1a1705961 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -87,6 +87,37 @@ def helper(known_provider_model: ProviderModelEntry, known_provider_model2: Prov return ModelRegistryHelper([known_provider_model, known_provider_model2]) +class MockModelRegistryHelperWithDynamicModels(ModelRegistryHelper): + """Test helper that simulates a provider with dynamically available models.""" + + def __init__(self, model_entries: list[ProviderModelEntry], available_models: list[str]): + super().__init__(model_entries) + self._available_models = available_models + + async def check_model_availability(self, model: str) -> bool: + return model in self._available_models + + +@pytest.fixture +def dynamic_model() -> Model: + """A model that's not in static config but available dynamically.""" + return Model( + provider_id="provider", + identifier="dynamic-model", + provider_resource_id="dynamic-provider-id", + ) + + +@pytest.fixture +def helper_with_dynamic_models( + known_provider_model: ProviderModelEntry, known_provider_model2: ProviderModelEntry, dynamic_model: Model +) -> MockModelRegistryHelperWithDynamicModels: + """Helper that includes dynamically available models.""" + return MockModelRegistryHelperWithDynamicModels( + [known_provider_model, known_provider_model2], [dynamic_model.provider_resource_id] + ) + + async def test_lookup_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None: assert helper.get_provider_model_id(unknown_model.model_id) is None @@ -151,3 +182,63 @@ async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_m assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id await helper.unregister_model(known_model.provider_resource_id) assert helper.get_provider_model_id(known_model.provider_resource_id) is None + + +async def test_register_model_from_check_model_availability( + helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, dynamic_model: Model +) -> None: + """Test that models returned by check_model_availability can be registered.""" + # Verify the model is not in static config + assert helper_with_dynamic_models.get_provider_model_id(dynamic_model.provider_resource_id) is None + + # But it should be available via check_model_availability + is_available = await helper_with_dynamic_models.check_model_availability(dynamic_model.provider_resource_id) + assert is_available + + # Registration should succeed + registered_model = await helper_with_dynamic_models.register_model(dynamic_model) + assert registered_model == dynamic_model + + # Model should now be registered and accessible + assert ( + helper_with_dynamic_models.get_provider_model_id(dynamic_model.model_id) == dynamic_model.provider_resource_id + ) + + +async def test_register_model_not_in_static_or_dynamic( + helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, unknown_model: Model +) -> None: + """Test that models not in static config or dynamic models are rejected.""" + # Verify the model is not in static config + assert helper_with_dynamic_models.get_provider_model_id(unknown_model.provider_resource_id) is None + + # And not available via check_model_availability + is_available = await helper_with_dynamic_models.check_model_availability(unknown_model.provider_resource_id) + assert not is_available + + # Registration should fail with comprehensive error message + with pytest.raises(Exception) as exc_info: # UnsupportedModelError + await helper_with_dynamic_models.register_model(unknown_model) + + # Error should include static models and "..." for dynamic models + error_str = str(exc_info.value) + assert "..." in error_str # "..." should be in error message + + +async def test_register_alias_for_dynamic_model( + helper_with_dynamic_models: MockModelRegistryHelperWithDynamicModels, dynamic_model: Model +) -> None: + """Test that we can register an alias that maps to a dynamically available model.""" + # Create a model with a different identifier but same provider_resource_id + alias_model = Model( + provider_id=dynamic_model.provider_id, + identifier="dynamic-model-alias", + provider_resource_id=dynamic_model.provider_resource_id, + ) + + # Registration should succeed since the provider_resource_id is available dynamically + registered_model = await helper_with_dynamic_models.register_model(alias_model) + assert registered_model == alias_model + + # Both the original provider_resource_id and the new alias should work + assert helper_with_dynamic_models.get_provider_model_id(alias_model.model_id) == dynamic_model.provider_resource_id