From 122de785c4406d50bbfa4171ef60151ed940a61f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 20 Oct 2025 20:06:16 -0700 Subject: [PATCH] chore(cleanup)!: kill vector_db references as far as possible (#3864) There should not be "vector db" anywhere. --- client-sdks/stainless/openapi.yml | 12 +- .../llama_stack_client_cli_reference.md | 48 ----- docs/static/deprecated-llama-stack-spec.html | 4 +- docs/static/deprecated-llama-stack-spec.yaml | 4 +- .../static/experimental-llama-stack-spec.html | 4 +- .../static/experimental-llama-stack-spec.yaml | 4 +- docs/static/llama-stack-spec.html | 8 +- docs/static/llama-stack-spec.yaml | 8 +- docs/static/stainless-llama-stack-spec.html | 12 +- docs/static/stainless-llama-stack-spec.yaml | 12 +- llama_stack/apis/datatypes.py | 2 +- llama_stack/apis/resource.py | 4 +- llama_stack/apis/vector_dbs/vector_dbs.py | 93 --------- llama_stack/apis/vector_io/vector_io.py | 11 +- .../{vector_dbs => vector_stores}/__init__.py | 2 +- .../apis/vector_stores/vector_stores.py | 51 +++++ llama_stack/core/access_control/datatypes.py | 6 +- llama_stack/core/datatypes.py | 10 +- llama_stack/core/distribution.py | 2 +- llama_stack/core/resolver.py | 4 +- llama_stack/core/routers/__init__.py | 4 +- llama_stack/core/routers/tool_runtime.py | 12 +- llama_stack/core/routers/vector_io.py | 47 ++--- llama_stack/core/routing_tables/common.py | 12 +- .../{vector_dbs.py => vector_stores.py} | 105 ++++------ llama_stack/core/ui/page/playground/tools.py | 14 +- llama_stack/providers/datatypes.py | 8 +- .../providers/inline/vector_io/faiss/faiss.py | 53 +++-- .../inline/vector_io/sqlite_vec/sqlite_vec.py | 83 ++++---- .../remote/vector_io/chroma/chroma.py | 58 +++--- .../remote/vector_io/milvus/milvus.py | 74 +++---- .../remote/vector_io/pgvector/pgvector.py | 72 +++---- .../remote/vector_io/qdrant/qdrant.py | 96 ++++----- .../remote/vector_io/weaviate/weaviate.py | 70 +++---- .../utils/memory/openai_vector_store_mixin.py | 43 ++-- .../providers/utils/memory/vector_store.py | 18 +- tests/integration/conftest.py | 3 + tests/integration/vector_io/test_vector_io.py | 68 ++++--- tests/unit/core/routers/test_vector_io.py | 2 +- tests/unit/providers/vector_io/conftest.py | 34 ++-- tests/unit/providers/vector_io/test_faiss.py | 18 +- .../test_vector_io_openai_vector_stores.py | 74 +++---- tests/unit/rag/test_rag_query.py | 4 +- tests/unit/rag/test_vector_store.py | 54 ++--- tests/unit/registry/test_registry.py | 192 +++++++++--------- tests/unit/server/test_server.py | 4 +- 46 files changed, 701 insertions(+), 822 deletions(-) delete mode 100644 llama_stack/apis/vector_dbs/vector_dbs.py rename llama_stack/apis/{vector_dbs => vector_stores}/__init__.py (87%) create mode 100644 llama_stack/apis/vector_stores/vector_stores.py rename llama_stack/core/routing_tables/{vector_dbs.py => vector_stores.py} (70%) diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index eff01931f..93049a14a 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -6440,7 +6440,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9132,7 +9132,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9440,7 +9440,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -10203,7 +10203,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -11325,7 +11325,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -12652,7 +12652,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/docs/references/llama_stack_client_cli_reference.md b/docs/docs/references/llama_stack_client_cli_reference.md index 9bb514a2d..a4321938a 100644 --- a/docs/docs/references/llama_stack_client_cli_reference.md +++ b/docs/docs/references/llama_stack_client_cli_reference.md @@ -32,7 +32,6 @@ Commands: scoring_functions Manage scoring functions. shields Manage safety shield services. toolgroups Manage available tool groups. - vector_dbs Manage vector databases. ``` ### `llama-stack-client configure` @@ -211,53 +210,6 @@ Unregister a model from distribution endpoint llama-stack-client models unregister ``` -## Vector DB Management -Manage vector databases. - - -### `llama-stack-client vector_dbs list` -Show available vector dbs on distribution endpoint -```bash -llama-stack-client vector_dbs list -``` -``` -┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ -┃ identifier ┃ provider_id ┃ provider_resource_id ┃ vector_db_type ┃ params ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -│ my_demo_vector_db │ faiss │ my_demo_vector_db │ │ embedding_dimension: 768 │ -│ │ │ │ │ embedding_model: nomic-embed-text-v1.5 │ -│ │ │ │ │ type: vector_db │ -│ │ │ │ │ │ -└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘ -``` - -### `llama-stack-client vector_dbs register` -Create a new vector db -```bash -llama-stack-client vector_dbs register [--provider-id ] [--provider-vector-db-id ] [--embedding-model ] [--embedding-dimension ] -``` - - -Required arguments: -- `VECTOR_DB_ID`: Vector DB ID - -Optional arguments: -- `--provider-id`: Provider ID for the vector db -- `--provider-vector-db-id`: Provider's vector db ID -- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5` -- `--embedding-dimension`: Dimension of embeddings. Default: 768 - -### `llama-stack-client vector_dbs unregister` -Delete a vector db -```bash -llama-stack-client vector_dbs unregister -``` - - -Required arguments: -- `VECTOR_DB_ID`: Vector DB ID - - ## Shield Management Manage safety shield services. ### `llama-stack-client shields list` diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 98ed50c4f..d920317cf 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -5547,7 +5547,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -5798,7 +5798,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index 99c8dd03e..66b2caeca 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -4114,7 +4114,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -4303,7 +4303,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html index 7d572f89f..ab474180e 100644 --- a/docs/static/experimental-llama-stack-spec.html +++ b/docs/static/experimental-llama-stack-spec.html @@ -1850,7 +1850,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -3983,7 +3983,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index fee20814c..dd9e43cc5 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -1320,7 +1320,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -2927,7 +2927,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 1091a1cb6..61deaec1e 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -6800,7 +6800,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -10205,7 +10205,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -10687,7 +10687,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -11740,7 +11740,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 6c3702374..c6197b36f 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -5227,7 +5227,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -7919,7 +7919,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -8227,7 +8227,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -8990,7 +8990,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index ee0a265d3..38122ebc0 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -8472,7 +8472,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -11877,7 +11877,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -12359,7 +12359,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -13412,7 +13412,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -14959,7 +14959,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", @@ -16704,7 +16704,7 @@ "enum": [ "model", "shield", - "vector_db", + "vector_store", "dataset", "scoring_function", "benchmark", diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index eff01931f..93049a14a 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -6440,7 +6440,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9132,7 +9132,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -9440,7 +9440,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -10203,7 +10203,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -11325,7 +11325,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark @@ -12652,7 +12652,7 @@ components: enum: - model - shield - - vector_db + - vector_store - dataset - scoring_function - benchmark diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 5777f3d04..948ec615f 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -121,7 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta): models = "models" shields = "shields" - vector_dbs = "vector_dbs" # only used for routing + vector_stores = "vector_stores" # only used for routing table datasets = "datasets" scoring_functions = "scoring_functions" benchmarks = "benchmarks" diff --git a/llama_stack/apis/resource.py b/llama_stack/apis/resource.py index 7c4130f7d..dafdb28b0 100644 --- a/llama_stack/apis/resource.py +++ b/llama_stack/apis/resource.py @@ -13,7 +13,7 @@ from pydantic import BaseModel, Field class ResourceType(StrEnum): model = "model" shield = "shield" - vector_db = "vector_db" + vector_store = "vector_store" dataset = "dataset" scoring_function = "scoring_function" benchmark = "benchmark" @@ -34,4 +34,4 @@ class Resource(BaseModel): provider_id: str = Field(description="ID of the provider that owns this resource") - type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)") + type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)") diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py deleted file mode 100644 index 0368095cb..000000000 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Literal, Protocol, runtime_checkable - -from pydantic import BaseModel - -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.schema_utils import json_schema_type - - -@json_schema_type -class VectorDB(Resource): - """Vector database resource for storing and querying vector embeddings. - - :param type: Type of resource, always 'vector_db' for vector databases - :param embedding_model: Name of the embedding model to use for vector generation - :param embedding_dimension: Dimension of the embedding vectors - """ - - type: Literal[ResourceType.vector_db] = ResourceType.vector_db - - embedding_model: str - embedding_dimension: int - vector_db_name: str | None = None - - @property - def vector_db_id(self) -> str: - return self.identifier - - @property - def provider_vector_db_id(self) -> str | None: - return self.provider_resource_id - - -class VectorDBInput(BaseModel): - """Input parameters for creating or configuring a vector database. - - :param vector_db_id: Unique identifier for the vector database - :param embedding_model: Name of the embedding model to use for vector generation - :param embedding_dimension: Dimension of the embedding vectors - :param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database - """ - - vector_db_id: str - embedding_model: str - embedding_dimension: int - provider_id: str | None = None - provider_vector_db_id: str | None = None - - -class ListVectorDBsResponse(BaseModel): - """Response from listing vector databases. - - :param data: List of vector databases - """ - - data: list[VectorDB] - - -@runtime_checkable -class VectorDBs(Protocol): - """Internal protocol for vector_dbs routing - no public API endpoints.""" - - async def list_vector_dbs(self) -> ListVectorDBsResponse: - """Internal method to list vector databases.""" - ... - - async def get_vector_db( - self, - vector_db_id: str, - ) -> VectorDB: - """Internal method to get a vector database by ID.""" - ... - - async def register_vector_db( - self, - vector_db_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - vector_db_name: str | None = None, - provider_vector_db_id: str | None = None, - ) -> VectorDB: - """Internal method to register a vector database.""" - ... - - async def unregister_vector_db(self, vector_db_id: str) -> None: - """Internal method to unregister a vector database.""" - ... diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index a309c47f9..49e4df039 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -15,7 +15,7 @@ from fastapi import Body from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB +from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id @@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel): total: int +# TODO: rename this as OpenAIVectorStore @json_schema_type class VectorStoreObject(BaseModel): """OpenAI Vector Store object. @@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all chunking_strategy: VectorStoreChunkingStrategy | None = None -class VectorDBStore(Protocol): - def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... +class VectorStoreTable(Protocol): + def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ... @runtime_checkable @trace_protocol class VectorIO(Protocol): - vector_db_store: VectorDBStore | None = None + vector_store_table: VectorStoreTable | None = None # this will just block now until chunks are inserted, but it should # probably return a Job instance which can be polled for completion + # TODO: rename vector_db_id to vector_store_id once Stainless is working @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1) async def insert_chunks( self, @@ -546,6 +548,7 @@ class VectorIO(Protocol): """ ... + # TODO: rename vector_db_id to vector_store_id once Stainless is working @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1) async def query_chunks( self, diff --git a/llama_stack/apis/vector_dbs/__init__.py b/llama_stack/apis/vector_stores/__init__.py similarity index 87% rename from llama_stack/apis/vector_dbs/__init__.py rename to llama_stack/apis/vector_stores/__init__.py index af34ba9d4..8fc34058a 100644 --- a/llama_stack/apis/vector_dbs/__init__.py +++ b/llama_stack/apis/vector_stores/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .vector_dbs import * +from .vector_stores import * diff --git a/llama_stack/apis/vector_stores/vector_stores.py b/llama_stack/apis/vector_stores/vector_stores.py new file mode 100644 index 000000000..524624028 --- /dev/null +++ b/llama_stack/apis/vector_stores/vector_stores.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Literal + +from pydantic import BaseModel + +from llama_stack.apis.resource import Resource, ResourceType + + +# Internal resource type for storing the vector store routing and other information +class VectorStore(Resource): + """Vector database resource for storing and querying vector embeddings. + + :param type: Type of resource, always 'vector_store' for vector stores + :param embedding_model: Name of the embedding model to use for vector generation + :param embedding_dimension: Dimension of the embedding vectors + """ + + type: Literal[ResourceType.vector_store] = ResourceType.vector_store + + embedding_model: str + embedding_dimension: int + vector_store_name: str | None = None + + @property + def vector_store_id(self) -> str: + return self.identifier + + @property + def provider_vector_store_id(self) -> str | None: + return self.provider_resource_id + + +class VectorStoreInput(BaseModel): + """Input parameters for creating or configuring a vector database. + + :param vector_store_id: Unique identifier for the vector store + :param embedding_model: Name of the embedding model to use for vector generation + :param embedding_dimension: Dimension of the embedding vectors + :param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store + """ + + vector_store_id: str + embedding_model: str + embedding_dimension: int + provider_id: str | None = None + provider_vector_store_id: str | None = None diff --git a/llama_stack/core/access_control/datatypes.py b/llama_stack/core/access_control/datatypes.py index c833ed51b..84beb8e15 100644 --- a/llama_stack/core/access_control/datatypes.py +++ b/llama_stack/core/access_control/datatypes.py @@ -41,7 +41,7 @@ class AccessRule(BaseModel): A rule defines a list of action either to permit or to forbid. It may specify a principal or a resource that must match for the rule to take effect. The resource to match should be specified in the form of a type qualified identifier, e.g. - model::my-model or vector_db::some-db, or a wildcard for all resources of a type, + model::my-model or vector_store::some-db, or a wildcard for all resources of a type, e.g. model::*. If the principal or resource are not specified, they will match all requests. @@ -79,9 +79,9 @@ class AccessRule(BaseModel): description: any user has read access to any resource created by a member of their team - forbid: actions: [create, read, delete] - resource: vector_db::* + resource: vector_store::* unless: user with admin in roles - description: only user with admin role can use vector_db resources + description: only user with admin role can use vector_store resources """ diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index e8cb36a02..5f4775d87 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -23,8 +23,8 @@ from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput from llama_stack.apis.shields import Shield, ShieldInput from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime -from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO +from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.storage.datatypes import ( KVStoreReference, @@ -71,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner): pass -class VectorDBWithOwner(VectorDB, ResourceWithOwner): +class VectorStoreWithOwner(VectorStore, ResourceWithOwner): pass @@ -91,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner): pass -RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup +RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup RoutableObjectWithProvider = Annotated[ ModelWithOwner | ShieldWithOwner - | VectorDBWithOwner + | VectorStoreWithOwner | DatasetWithOwner | ScoringFnWithOwner | BenchmarkWithOwner @@ -427,7 +427,7 @@ class RegisteredResources(BaseModel): models: list[ModelInput] = Field(default_factory=list) shields: list[ShieldInput] = Field(default_factory=list) - vector_dbs: list[VectorDBInput] = Field(default_factory=list) + vector_stores: list[VectorStoreInput] = Field(default_factory=list) datasets: list[DatasetInput] = Field(default_factory=list) scoring_fns: list[ScoringFnInput] = Field(default_factory=list) benchmarks: list[BenchmarkInput] = Field(default_factory=list) diff --git a/llama_stack/core/distribution.py b/llama_stack/core/distribution.py index 59461f5d6..82cbcf984 100644 --- a/llama_stack/core/distribution.py +++ b/llama_stack/core/distribution.py @@ -64,7 +64,7 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]: router_api=Api.tool_runtime, ), AutoRoutedApiInfo( - routing_table_api=Api.vector_dbs, + routing_table_api=Api.vector_stores, router_api=Api.vector_io, ), ] diff --git a/llama_stack/core/resolver.py b/llama_stack/core/resolver.py index 6e1843870..0b63815ea 100644 --- a/llama_stack/core/resolver.py +++ b/llama_stack/core/resolver.py @@ -29,8 +29,8 @@ from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.shields import Shields from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_io import VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.core.client import get_client_impl from llama_stack.core.datatypes import ( @@ -82,7 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> Api.inspect: Inspect, Api.batches: Batches, Api.vector_io: VectorIO, - Api.vector_dbs: VectorDBs, + Api.vector_stores: VectorStore, Api.models: Models, Api.safety: Safety, Api.shields: Shields, diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py index df4df0463..20c17e59d 100644 --- a/llama_stack/core/routers/__init__.py +++ b/llama_stack/core/routers/__init__.py @@ -29,7 +29,7 @@ async def get_routing_table_impl( from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable from ..routing_tables.shields import ShieldsRoutingTable from ..routing_tables.toolgroups import ToolGroupsRoutingTable - from ..routing_tables.vector_dbs import VectorDBsRoutingTable + from ..routing_tables.vector_stores import VectorStoresRoutingTable api_to_tables = { "models": ModelsRoutingTable, @@ -38,7 +38,7 @@ async def get_routing_table_impl( "scoring_functions": ScoringFunctionsRoutingTable, "benchmarks": BenchmarksRoutingTable, "tool_groups": ToolGroupsRoutingTable, - "vector_dbs": VectorDBsRoutingTable, + "vector_stores": VectorStoresRoutingTable, } if api.value not in api_to_tables: diff --git a/llama_stack/core/routers/tool_runtime.py b/llama_stack/core/routers/tool_runtime.py index ad82293e5..be4c13905 100644 --- a/llama_stack/core/routers/tool_runtime.py +++ b/llama_stack/core/routers/tool_runtime.py @@ -37,24 +37,24 @@ class ToolRuntimeRouter(ToolRuntime): async def query( self, content: InterleavedContent, - vector_db_ids: list[str], + vector_store_ids: list[str], query_config: RAGQueryConfig | None = None, ) -> RAGQueryResult: - logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}") + logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}") provider = await self.routing_table.get_provider_impl("knowledge_search") - return await provider.query(content, vector_db_ids, query_config) + return await provider.query(content, vector_store_ids, query_config) async def insert( self, documents: list[RAGDocument], - vector_db_id: str, + vector_store_id: str, chunk_size_in_tokens: int = 512, ) -> None: logger.debug( - f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" + f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" ) provider = await self.routing_table.get_provider_impl("insert_into_memory") - return await provider.insert(documents, vector_db_id, chunk_size_in_tokens) + return await provider.insert(documents, vector_store_id, chunk_size_in_tokens) def __init__( self, diff --git a/llama_stack/core/routers/vector_io.py b/llama_stack/core/routers/vector_io.py index bfc5f7164..2b1701dc2 100644 --- a/llama_stack/core/routers/vector_io.py +++ b/llama_stack/core/routers/vector_io.py @@ -71,25 +71,6 @@ class VectorIORouter(VectorIO): raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model") - async def register_vector_db( - self, - vector_db_id: str, - embedding_model: str, - embedding_dimension: int | None = 384, - provider_id: str | None = None, - vector_db_name: str | None = None, - provider_vector_db_id: str | None = None, - ) -> None: - logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}") - await self.routing_table.register_vector_db( - vector_db_id, - embedding_model, - embedding_dimension, - provider_id, - vector_db_name, - provider_vector_db_id, - ) - async def insert_chunks( self, vector_db_id: str, @@ -165,22 +146,22 @@ class VectorIORouter(VectorIO): else: provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] - vector_db_id = f"vs_{uuid.uuid4()}" - registered_vector_db = await self.routing_table.register_vector_db( - vector_db_id=vector_db_id, + vector_store_id = f"vs_{uuid.uuid4()}" + registered_vector_store = await self.routing_table.register_vector_store( + vector_store_id=vector_store_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension, provider_id=provider_id, - provider_vector_db_id=vector_db_id, - vector_db_name=params.name, + provider_vector_store_id=vector_store_id, + vector_store_name=params.name, ) - provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) + provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier) - # Update model_extra with registered values so provider uses the already-registered vector_db + # Update model_extra with registered values so provider uses the already-registered vector_store if params.model_extra is None: params.model_extra = {} - params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id - params.model_extra["provider_id"] = registered_vector_db.provider_id + params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id + params.model_extra["provider_id"] = registered_vector_store.provider_id if embedding_model is not None: params.model_extra["embedding_model"] = embedding_model if embedding_dimension is not None: @@ -198,15 +179,15 @@ class VectorIORouter(VectorIO): logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}") # Route to default provider for now - could aggregate from all providers in the future # call retrieve on each vector dbs to get list of vector stores - vector_dbs = await self.routing_table.get_all_with_type("vector_db") + vector_stores = await self.routing_table.get_all_with_type("vector_store") all_stores = [] - for vector_db in vector_dbs: + for vector_store in vector_stores: try: - provider = await self.routing_table.get_provider_impl(vector_db.identifier) - vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier) + provider = await self.routing_table.get_provider_impl(vector_store.identifier) + vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier) all_stores.append(vector_store) except Exception as e: - logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}") + logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}") continue # Sort by created_at diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py index 087483bb6..d6faf93c5 100644 --- a/llama_stack/core/routing_tables/common.py +++ b/llama_stack/core/routing_tables/common.py @@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable elif api == Api.safety: return await p.register_shield(obj) elif api == Api.vector_io: - return await p.register_vector_db(obj) + return await p.register_vector_store(obj) elif api == Api.datasetio: return await p.register_dataset(obj) elif api == Api.scoring: @@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None: api = get_impl_api(p) if api == Api.vector_io: - return await p.unregister_vector_db(obj.identifier) + return await p.unregister_vector_store(obj.identifier) elif api == Api.inference: return await p.unregister_model(obj.identifier) elif api == Api.safety: @@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable): elif api == Api.safety: p.shield_store = self elif api == Api.vector_io: - p.vector_db_store = self + p.vector_store_store = self elif api == Api.datasetio: p.dataset_store = self elif api == Api.scoring: @@ -134,15 +134,15 @@ class CommonRoutingTableImpl(RoutingTable): from .scoring_functions import ScoringFunctionsRoutingTable from .shields import ShieldsRoutingTable from .toolgroups import ToolGroupsRoutingTable - from .vector_dbs import VectorDBsRoutingTable + from .vector_stores import VectorStoresRoutingTable def apiname_object(): if isinstance(self, ModelsRoutingTable): return ("Inference", "model") elif isinstance(self, ShieldsRoutingTable): return ("Safety", "shield") - elif isinstance(self, VectorDBsRoutingTable): - return ("VectorIO", "vector_db") + elif isinstance(self, VectorStoresRoutingTable): + return ("VectorIO", "vector_store") elif isinstance(self, DatasetsRoutingTable): return ("DatasetIO", "dataset") elif isinstance(self, ScoringFunctionsRoutingTable): diff --git a/llama_stack/core/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_stores.py similarity index 70% rename from llama_stack/core/routing_tables/vector_dbs.py rename to llama_stack/core/routing_tables/vector_stores.py index e87fb61c6..c6c80a01e 100644 --- a/llama_stack/core/routing_tables/vector_dbs.py +++ b/llama_stack/core/routing_tables/vector_stores.py @@ -6,15 +6,12 @@ from typing import Any -from pydantic import TypeAdapter - from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError from llama_stack.apis.models import ModelType from llama_stack.apis.resource import ResourceType -# Removed VectorDBs import to avoid exposing public API +# Removed VectorStores import to avoid exposing public API from llama_stack.apis.vector_io.vector_io import ( - OpenAICreateVectorStoreRequestWithExtraBody, SearchRankingOptions, VectorStoreChunkingStrategy, VectorStoreDeleteResponse, @@ -26,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import ( VectorStoreSearchResponsePage, ) from llama_stack.core.datatypes import ( - VectorDBWithOwner, + VectorStoreWithOwner, ) from llama_stack.log import get_logger @@ -35,23 +32,23 @@ from .common import CommonRoutingTableImpl, lookup_model logger = get_logger(name=__name__, category="core::routing_tables") -class VectorDBsRoutingTable(CommonRoutingTableImpl): - """Internal routing table for vector_db operations. +class VectorStoresRoutingTable(CommonRoutingTableImpl): + """Internal routing table for vector_store operations. - Does not inherit from VectorDBs to avoid exposing public API endpoints. + Does not inherit from VectorStores to avoid exposing public API endpoints. Only provides internal routing functionality for VectorIORouter. """ # Internal methods only - no public API exposure - async def register_vector_db( + async def register_vector_store( self, - vector_db_id: str, + vector_store_id: str, embedding_model: str, embedding_dimension: int | None = 384, provider_id: str | None = None, - provider_vector_db_id: str | None = None, - vector_db_name: str | None = None, + provider_vector_store_id: str | None = None, + vector_store_name: str | None = None, ) -> Any: if provider_id is None: if len(self.impls_by_provider_id) > 0: @@ -67,52 +64,24 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): raise ModelNotFoundError(embedding_model) if model.model_type != ModelType.embedding: raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) - if "embedding_dimension" not in model.metadata: - raise ValueError(f"Model {embedding_model} does not have an embedding dimension") - try: - provider = self.impls_by_provider_id[provider_id] - except KeyError: - available_providers = list(self.impls_by_provider_id.keys()) - raise ValueError( - f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}" - ) from None - logger.warning( - "VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly." - ) - request = OpenAICreateVectorStoreRequestWithExtraBody( - name=vector_db_name or vector_db_id, - embedding_model=embedding_model, - embedding_dimension=model.metadata["embedding_dimension"], + vector_store = VectorStoreWithOwner( + identifier=vector_store_id, + type=ResourceType.vector_store.value, provider_id=provider_id, - provider_vector_db_id=provider_vector_db_id, + provider_resource_id=provider_vector_store_id, + embedding_model=embedding_model, + embedding_dimension=embedding_dimension, + vector_store_name=vector_store_name, ) - vector_store = await provider.openai_create_vector_store(request) - - vector_store_id = vector_store.id - actual_provider_vector_db_id = provider_vector_db_id or vector_store_id - logger.warning( - f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name" - ) - - vector_db_data = { - "identifier": vector_store_id, - "type": ResourceType.vector_db.value, - "provider_id": provider_id, - "provider_resource_id": actual_provider_vector_db_id, - "embedding_model": embedding_model, - "embedding_dimension": model.metadata["embedding_dimension"], - "vector_db_name": vector_store.name, - } - vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data) - await self.register_object(vector_db) - return vector_db + await self.register_object(vector_store) + return vector_store async def openai_retrieve_vector_store( self, vector_store_id: str, ) -> VectorStoreObject: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store(vector_store_id) @@ -123,7 +92,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): expires_after: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None, ) -> VectorStoreObject: - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_update_vector_store( vector_store_id=vector_store_id, @@ -136,18 +105,18 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): self, vector_store_id: str, ) -> VectorStoreDeleteResponse: - await self.assert_action_allowed("delete", "vector_db", vector_store_id) + await self.assert_action_allowed("delete", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) result = await provider.openai_delete_vector_store(vector_store_id) - await self.unregister_vector_db(vector_store_id) + await self.unregister_vector_store(vector_store_id) return result - async def unregister_vector_db(self, vector_store_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: """Remove the vector store from the routing table registry.""" try: - vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id) - if vector_db_obj: - await self.unregister_object(vector_db_obj) + vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id) + if vector_store_obj: + await self.unregister_object(vector_store_obj) except Exception as e: # Log the error but don't fail the operation logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}") @@ -162,7 +131,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): rewrite_query: bool | None = False, search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_search_vector_store( vector_store_id=vector_store_id, @@ -181,7 +150,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): attributes: dict[str, Any] | None = None, chunking_strategy: VectorStoreChunkingStrategy | None = None, ) -> VectorStoreFileObject: - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_attach_file_to_vector_store( vector_store_id=vector_store_id, @@ -199,7 +168,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): before: str | None = None, filter: VectorStoreFileStatus | None = None, ) -> list[VectorStoreFileObject]: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_list_files_in_vector_store( vector_store_id=vector_store_id, @@ -215,7 +184,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): vector_store_id: str, file_id: str, ) -> VectorStoreFileObject: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file( vector_store_id=vector_store_id, @@ -227,7 +196,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): vector_store_id: str, file_id: str, ) -> VectorStoreFileContentsResponse: - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, @@ -240,7 +209,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): file_id: str, attributes: dict[str, Any], ) -> VectorStoreFileObject: - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_update_vector_store_file( vector_store_id=vector_store_id, @@ -253,7 +222,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): vector_store_id: str, file_id: str, ) -> VectorStoreFileDeleteResponse: - await self.assert_action_allowed("delete", "vector_db", vector_store_id) + await self.assert_action_allowed("delete", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_delete_vector_store_file( vector_store_id=vector_store_id, @@ -267,7 +236,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): attributes: dict[str, Any] | None = None, chunking_strategy: Any | None = None, ): - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_create_vector_store_file_batch( vector_store_id=vector_store_id, @@ -281,7 +250,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): batch_id: str, vector_store_id: str, ): - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_batch( batch_id=batch_id, @@ -298,7 +267,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): limit: int | None = 20, order: str | None = "desc", ): - await self.assert_action_allowed("read", "vector_db", vector_store_id) + await self.assert_action_allowed("read", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_list_files_in_vector_store_file_batch( batch_id=batch_id, @@ -315,7 +284,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl): batch_id: str, vector_store_id: str, ): - await self.assert_action_allowed("update", "vector_db", vector_store_id) + await self.assert_action_allowed("update", "vector_store", vector_store_id) provider = await self.get_provider_impl(vector_store_id) return await provider.openai_cancel_vector_store_file_batch( batch_id=batch_id, diff --git a/llama_stack/core/ui/page/playground/tools.py b/llama_stack/core/ui/page/playground/tools.py index 4ee9d2204..16fd464ee 100644 --- a/llama_stack/core/ui/page/playground/tools.py +++ b/llama_stack/core/ui/page/playground/tools.py @@ -32,7 +32,7 @@ def tool_chat_page(): tool_groups_list = [tool_group.identifier for tool_group in tool_groups] mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")] builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")] - selected_vector_dbs = [] + selected_vector_stores = [] def reset_agent(): st.session_state.clear() @@ -55,13 +55,13 @@ def tool_chat_page(): ) if "builtin::rag" in toolgroup_selection: - vector_dbs = llama_stack_api.client.vector_dbs.list() or [] - if not vector_dbs: + vector_stores = llama_stack_api.client.vector_stores.list() or [] + if not vector_stores: st.info("No vector databases available for selection.") - vector_dbs = [vector_db.identifier for vector_db in vector_dbs] - selected_vector_dbs = st.multiselect( + vector_stores = [vector_store.identifier for vector_store in vector_stores] + selected_vector_stores = st.multiselect( label="Select Document Collections to use in RAG queries", - options=vector_dbs, + options=vector_stores, on_change=reset_agent, ) @@ -119,7 +119,7 @@ def tool_chat_page(): tool_dict = dict( name="builtin::rag", args={ - "vector_db_ids": list(selected_vector_dbs), + "vector_store_ids": list(selected_vector_stores), }, ) toolgroup_selection[i] = tool_dict diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index c8ff9cecb..9be3edb8e 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -17,7 +17,7 @@ from llama_stack.apis.models import Model from llama_stack.apis.scoring_functions import ScoringFn from llama_stack.apis.shields import Shield from llama_stack.apis.tools import ToolGroup -from llama_stack.apis.vector_dbs import VectorDB +from llama_stack.apis.vector_stores import VectorStore from llama_stack.schema_utils import json_schema_type @@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol): async def unregister_shield(self, identifier: str) -> None: ... -class VectorDBsProtocolPrivate(Protocol): - async def register_vector_db(self, vector_db: VectorDB) -> None: ... +class VectorStoresProtocolPrivate(Protocol): + async def register_vector_store(self, vector_store: VectorStore) -> None: ... - async def unregister_vector_db(self, vector_db_id: str) -> None: ... + async def unregister_vector_store(self, vector_store_id: str) -> None: ... class DatasetsProtocolPrivate(Protocol): diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index f13eb3e96..5e33d4ca3 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -17,21 +17,21 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import FaissVectorIOConfig logger = get_logger(name=__name__, category="vector_io") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::" FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" @@ -176,28 +176,28 @@ class FaissIndex(EmbeddingIndex): ) -class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.cache: dict[str, VectorDBWithIndex] = {} + self.cache: dict[str, VectorStoreWithIndex] = {} async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) # Load existing banks from kvstore start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, - await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, + await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -222,32 +222,31 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr except Exception as e: return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: assert self.kvstore is not None - key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" - await self.kvstore.set(key=key, value=vector_db.model_dump_json()) + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) # Store in cache - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db=vector_db, - index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store=vector_store, + index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier), inference_api=self.inference_api, ) - async def list_vector_dbs(self) -> list[VectorDB]: - return [i.vector_db for i in self.cache.values()] + async def list_vector_stores(self) -> list[VectorStore]: + return [i.vector_store for i in self.cache.values()] - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: assert self.kvstore is not None - if vector_db_id not in self.cache: - logger.warning(f"Vector DB {vector_db_id} not found") + if vector_store_id not in self.cache: return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] - await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] + await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: index = self.cache.get(vector_db_id) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index cfe23bde5..37294f173 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -17,10 +17,10 @@ from numpy.typing import NDArray from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator @@ -41,7 +41,7 @@ HYBRID_SEARCH = "hybrid" SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH} VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::" @@ -374,32 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex): await asyncio.to_thread(_delete_chunks) -class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): """ A VectorIO implementation using SQLite + sqlite_vec. - This class handles vector database registration (with metadata stored in a table named `vector_dbs`) - and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). + This class handles vector database registration (with metadata stored in a table named `vector_stores`) + and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex). """ def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api - self.cache: dict[str, VectorDBWithIndex] = {} - self.vector_db_store = None + self.cache: dict[str, VectorStoreWithIndex] = {} + self.vector_store_table = None async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) - for db_json in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(db_json) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) + for db_json in stored_vector_stores: + vector_store = VectorStore.model_validate_json(db_json) index = await SQLiteVecIndex.create( - vector_db.embedding_dimension, self.config.db_path, vector_db.identifier + vector_store.embedding_dimension, self.config.db_path, vector_store.identifier ) - self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) # Load existing OpenAI vector stores into the in-memory cache await self.initialize_openai_vector_stores() @@ -408,63 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc # Clean up mixin resources (file batch tasks) await super().shutdown() - async def list_vector_dbs(self) -> list[VectorDB]: - return [v.vector_db for v in self.cache.values()] + async def list_vector_stores(self) -> list[VectorStore]: + return [v.vector_store for v in self.cache.values()] - async def register_vector_db(self, vector_db: VectorDB) -> None: - index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier) - self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) + async def register_vector_store(self, vector_store: VectorStore) -> None: + index = await SQLiteVecIndex.create( + vector_store.embedding_dimension, self.config.db_path, vector_store.identifier + ) + self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = VectorDBWithIndex( - vector_db=vector_db, + index = VectorStoreWithIndex( + vector_store=vector_store, index=SQLiteVecIndex( - dimension=vector_db.embedding_dimension, + dimension=vector_store.embedding_dimension, db_path=self.config.db_path, - bank_id=vector_db.identifier, + bank_id=vector_store.identifier, kvstore=self.kvstore, ), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id not in self.cache: - logger.warning(f"Vector DB {vector_db_id} not found") + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id not in self.cache: return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) - # The VectorDBWithIndex helper is expected to compute embeddings via the inference_api + # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api # and then call our index's add_chunks. await index.insert_chunks(chunks) async def query_chunks( self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a sqlite_vec index.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 0aa728c32..2663ad43e 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -13,15 +13,15 @@ from numpy.typing import NDArray from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -30,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma") ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::" @@ -114,7 +114,7 @@ class ChromaIndex(EmbeddingIndex): raise NotImplementedError("Hybrid search is not supported in Chroma") -class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, @@ -127,11 +127,11 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.inference_api = inference_api self.client = None self.cache = {} - self.vector_db_store = None + self.vector_store_table = None async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) - self.vector_db_store = self.kvstore + self.vector_store_table = self.kvstore if isinstance(self.config, RemoteChromaVectorIOConfig): log.info(f"Connecting to Chroma server at: {self.config.url}") @@ -151,26 +151,26 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: collection = await maybe_await( self.client.get_or_create_collection( - name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()} + name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()} ) ) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db, ChromaIndex(self.client, collection), self.inference_api + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, ChromaIndex(self.client, collection), self.inference_api ) - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id not in self.cache: - log.warning(f"Vector DB {vector_db_id} not found") + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id not in self.cache: + log.warning(f"Vector DB {vector_store_id} not found") return - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") @@ -179,30 +179,30 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if index is None: raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") return await index.query_chunks(query, params) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack") - collection = await maybe_await(self.client.get_collection(vector_db_id)) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack") + collection = await maybe_await(self.client.get_collection(vector_store_id)) if not collection: - raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") - index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api) - self.cache[vector_db_id] = index + raise ValueError(f"Vector DB {vector_store_id} not found in Chroma") + index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api) + self.cache[vector_store_id] = index return index async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a Chroma vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index d7c34163d..cccf13816 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -14,10 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore @@ -26,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_WEIGHTED, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name @@ -35,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig logger = get_logger(name=__name__, category="vector_io::milvus") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::" @@ -261,7 +261,7 @@ class MilvusIndex(EmbeddingIndex): raise -class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, @@ -273,28 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.cache = {} self.client = None self.inference_api = inference_api - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" async def initialize(self) -> None: self.kvstore = await kvstore_impl(self.config.persistence) start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex( - vector_db, + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, index=MilvusIndex( client=self.client, - collection_name=vector_db.identifier, + collection_name=vector_store.identifier, consistency_level=self.config.consistency_level, kvstore=self.kvstore, ), inference_api=self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index if isinstance(self.config, RemoteMilvusVectorIOConfig): logger.info(f"Connecting to Milvus server at {self.config.uri}") self.client = MilvusClient(**self.config.model_dump(exclude_none=True)) @@ -311,45 +311,45 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: if isinstance(self.config, RemoteMilvusVectorIOConfig): consistency_level = self.config.consistency_level else: consistency_level = "Strong" - index = VectorDBWithIndex( - vector_db=vector_db, - index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level), inference_api=self.inference_api, ) - self.cache[vector_db.identifier] = index + self.cache[vector_store.identifier] = index - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = VectorDBWithIndex( - vector_db=vector_db, - index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -358,14 +358,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete a chunk from a milvus vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 703a47843..f28bd3cd9 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -16,15 +16,15 @@ from pydantic import BaseModel, TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name from .config import PGVectorVectorIOConfig @@ -32,7 +32,7 @@ from .config import PGVectorVectorIOConfig log = get_logger(name=__name__, category="vector_io::pgvector") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::" @@ -79,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex): def __init__( self, - vector_db: VectorDB, + vector_store: VectorStore, dimension: int, conn: psycopg2.extensions.connection, kvstore: KVStore | None = None, distance_metric: str = "COSINE", ): - self.vector_db = vector_db + self.vector_store = vector_store self.dimension = dimension self.conn = conn self.kvstore = kvstore @@ -97,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex): try: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: # Sanitize the table name by replacing hyphens with underscores - # SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens + # SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens # when created with patterns like "test-vector-db-{uuid4()}" - sanitized_identifier = sanitize_collection_name(self.vector_db.identifier) + sanitized_identifier = sanitize_collection_name(self.vector_store.identifier) self.table_name = f"vs_{sanitized_identifier}" cur.execute( @@ -122,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex): """ ) except Exception as e: - log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") - raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e + log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") + raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray): assert len(chunks) == len(embeddings), ( @@ -323,7 +323,7 @@ class PGVectorIndex(EmbeddingIndex): ) -class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None ) -> None: @@ -332,7 +332,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco self.inference_api = inference_api self.conn = None self.cache = {} - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" async def initialize(self) -> None: @@ -375,59 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: # Persist vector DB metadata in the KV store assert self.kvstore is not None # Upsert model metadata in Postgres - upsert_models(self.conn, [(vector_db.identifier, vector_db)]) + upsert_models(self.conn, [(vector_store.identifier, vector_store)]) # Create and cache the PGVector index table for the vector DB pgvector_index = PGVectorIndex( - vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore + vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore ) await pgvector_index.initialize() - index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api) - self.cache[vector_db.identifier] = index + index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api) + self.cache[vector_store.identifier] = index - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: # Remove provider index and cache - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] # Delete vector DB metadata from KV store assert self.kvstore is not None - await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}") + await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}") async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) await index.insert_chunks(chunks) async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) return await index.query_chunks(query, params) - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) - index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn) + index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn) await index.initialize() - self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api) - return self.cache[vector_db_id] + self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api) + return self.cache[vector_store_id] async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete a chunk from a PostgreSQL vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise VectorStoreNotFoundError(store_id) diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 6838d69e9..93d0894a6 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, QueryChunksResponse, @@ -24,12 +23,13 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategy, VectorStoreFileObject, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig @@ -38,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id" # KV store prefixes for vector databases VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::" def convert_id(_id: str) -> str: @@ -145,7 +145,7 @@ class QdrantIndex(EmbeddingIndex): await self.client.delete_collection(collection_name=self.collection_name) -class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): +class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate): def __init__( self, config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, @@ -157,7 +157,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api - self.vector_db_store = None + self.vector_store_table = None self._qdrant_lock = asyncio.Lock() async def initialize(self) -> None: @@ -167,12 +167,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP start_key = VECTOR_DBS_PREFIX end_key = f"{VECTOR_DBS_PREFIX}\xff" - stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) + stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key) - for vector_db_data in stored_vector_dbs: - vector_db = VectorDB.model_validate_json(vector_db_data) - index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api) - self.cache[vector_db.identifier] = index + for vector_store_data in stored_vector_stores: + vector_store = VectorStore.model_validate_json(vector_store_data) + index = VectorStoreWithIndex( + vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api + ) + self.cache[vector_store.identifier] = index self.openai_vector_stores = await self._load_openai_vector_stores() async def shutdown(self) -> None: @@ -180,46 +182,48 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: assert self.kvstore is not None - key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" - await self.kvstore.set(key=key, value=vector_db.model_dump_json()) + key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}" + await self.kvstore.set(key=key, value=vector_store.model_dump_json()) - index = VectorDBWithIndex( - vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api - ) - - self.cache[vector_db.identifier] = index - - async def unregister_vector_db(self, vector_db_id: str) -> None: - if vector_db_id in self.cache: - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] - - assert self.kvstore is not None - await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") - - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] - - if self.vector_db_store is None: - raise ValueError(f"Vector DB not found {vector_db_id}") - - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) - - index = VectorDBWithIndex( - vector_db=vector_db, - index=QdrantIndex(client=self.client, collection_name=vector_db.identifier), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=QdrantIndex(self.client, vector_store.identifier), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + + self.cache[vector_store.identifier] = index + + async def unregister_vector_store(self, vector_store_id: str) -> None: + if vector_store_id in self.cache: + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] + + assert self.kvstore is not None + await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}") + + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] + + if self.vector_store_table is None: + raise ValueError(f"Vector DB not found {vector_store_id}") + + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) + + index = VectorStoreWithIndex( + vector_store=vector_store, + index=QdrantIndex(client=self.client, collection_name=vector_store.identifier), + inference_api=self.inference_api, + ) + self.cache[vector_store_id] = index return index async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -228,7 +232,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -249,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: """Delete chunks from a Qdrant vector store.""" - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index 8e7eb7267..66922aa3f 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -16,11 +16,11 @@ from llama_stack.apis.common.content_types import InterleavedContent from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorDBsProtocolPrivate +from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin @@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import ( RERANKER_TYPE_RRF, ChunkForDeletion, EmbeddingIndex, - VectorDBWithIndex, + VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name @@ -37,7 +37,7 @@ from .config import WeaviateVectorIOConfig log = get_logger(name=__name__, category="vector_io::weaviate") VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::" @@ -257,14 +257,14 @@ class WeaviateIndex(EmbeddingIndex): return QueryChunksResponse(chunks=chunks, scores=scores) -class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate): +class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate): def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: super().__init__(files_api=files_api, kvstore=None) self.config = config self.inference_api = inference_api self.client_cache = {} self.cache = {} - self.vector_db_store = None + self.vector_store_table = None self.metadata_collection_name = "openai_vector_stores_metadata" def _get_client(self) -> weaviate.WeaviateClient: @@ -300,11 +300,11 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv end_key = f"{VECTOR_DBS_PREFIX}\xff" stored = await self.kvstore.values_in_range(start_key, end_key) for raw in stored: - vector_db = VectorDB.model_validate_json(raw) + vector_store = VectorStore.model_validate_json(raw) client = self._get_client() - idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db=vector_db, index=idx, inference_api=self.inference_api + idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore) + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store=vector_store, index=idx, inference_api=self.inference_api ) # Load OpenAI vector stores metadata into cache @@ -316,9 +316,9 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv # Clean up mixin resources (file batch tasks) await super().shutdown() - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) + sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) # Create collection if it doesn't exist if not client.collections.exists(sanitized_collection_name): client.collections.create( @@ -329,45 +329,45 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv ], ) - self.cache[vector_db.identifier] = VectorDBWithIndex( - vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api + self.cache[vector_store.identifier] = VectorStoreWithIndex( + vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api ) - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db_id, weaviate_format=True) - if vector_db_id not in self.cache or client.collections.exists(sanitized_collection_name) is False: + sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True) + if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False: return client.collections.delete(sanitized_collection_name) - await self.cache[vector_db_id].index.delete() - del self.cache[vector_db_id] + await self.cache[vector_store_id].index.delete() + del self.cache[vector_store_id] - async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: - if vector_db_id in self.cache: - return self.cache[vector_db_id] + async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None: + if vector_store_id in self.cache: + return self.cache[vector_store_id] - if self.vector_db_store is None: - raise VectorStoreNotFoundError(vector_db_id) + if self.vector_store_table is None: + raise VectorStoreNotFoundError(vector_store_id) - vector_db = await self.vector_db_store.get_vector_db(vector_db_id) - if not vector_db: - raise VectorStoreNotFoundError(vector_db_id) + vector_store = await self.vector_store_table.get_vector_store(vector_store_id) + if not vector_store: + raise VectorStoreNotFoundError(vector_store_id) client = self._get_client() - sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) + sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True) if not client.collections.exists(sanitized_collection_name): raise ValueError(f"Collection with name `{sanitized_collection_name}` not found") - index = VectorDBWithIndex( - vector_db=vector_db, - index=WeaviateIndex(client=client, collection_name=vector_db.identifier), + index = VectorStoreWithIndex( + vector_store=vector_store, + index=WeaviateIndex(client=client, collection_name=vector_store.identifier), inference_api=self.inference_api, ) - self.cache[vector_db_id] = index + self.cache[vector_store_id] = index return index async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) @@ -376,14 +376,14 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv async def query_chunks( self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None ) -> QueryChunksResponse: - index = await self._get_and_cache_vector_db_index(vector_db_id) + index = await self._get_and_cache_vector_store_index(vector_db_id) if not index: raise VectorStoreNotFoundError(vector_db_id) return await index.query_chunks(query, params) async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: - index = await self._get_and_cache_vector_db_index(store_id) + index = await self._get_and_cache_vector_store_index(store_id) if not index: raise ValueError(f"Vector DB {store_id} not found") diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 7806d98c1..6629fb965 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -17,7 +17,6 @@ from pydantic import TypeAdapter from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files import Files, OpenAIFileObject -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, @@ -43,6 +42,7 @@ from llama_stack.apis.vector_io import ( VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.id_generation import generate_object_id from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore.api import KVStore @@ -63,7 +63,7 @@ MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size VERSION = "v3" -VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" +VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::" @@ -321,12 +321,12 @@ class OpenAIVectorStoreMixin(ABC): pass @abstractmethod - async def register_vector_db(self, vector_db: VectorDB) -> None: + async def register_vector_store(self, vector_store: VectorStore) -> None: """Register a vector database (provider-specific implementation).""" pass @abstractmethod - async def unregister_vector_db(self, vector_db_id: str) -> None: + async def unregister_vector_store(self, vector_store_id: str) -> None: """Unregister a vector database (provider-specific implementation).""" pass @@ -358,7 +358,7 @@ class OpenAIVectorStoreMixin(ABC): extra_body = params.model_extra or {} metadata = params.metadata or {} - provider_vector_db_id = extra_body.get("provider_vector_db_id") + provider_vector_store_id = extra_body.get("provider_vector_store_id") # Use embedding info from metadata if available, otherwise from extra_body if metadata.get("embedding_model"): @@ -389,8 +389,8 @@ class OpenAIVectorStoreMixin(ABC): # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None) - # Derive the canonical vector_db_id (allow override, else generate) - vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") + # Derive the canonical vector_store_id (allow override, else generate) + vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") if embedding_model is None: raise ValueError("embedding_model is required") @@ -398,19 +398,20 @@ class OpenAIVectorStoreMixin(ABC): if embedding_dimension is None: raise ValueError("Embedding dimension is required") - # Register the VectorDB backing this vector store + # Register the VectorStore backing this vector store if provider_id is None: raise ValueError("Provider ID is required but was not provided") - vector_db = VectorDB( - identifier=vector_db_id, + # call to the provider to create any index, etc. + vector_store = VectorStore( + identifier=vector_store_id, embedding_dimension=embedding_dimension, embedding_model=embedding_model, provider_id=provider_id, - provider_resource_id=vector_db_id, - vector_db_name=params.name, + provider_resource_id=vector_store_id, + vector_store_name=params.name, ) - await self.register_vector_db(vector_db) + await self.register_vector_store(vector_store) # Create OpenAI vector store metadata status = "completed" @@ -424,7 +425,7 @@ class OpenAIVectorStoreMixin(ABC): total=0, ) store_info: dict[str, Any] = { - "id": vector_db_id, + "id": vector_store_id, "object": "vector_store", "created_at": created_at, "name": params.name, @@ -441,23 +442,23 @@ class OpenAIVectorStoreMixin(ABC): # Add provider information to metadata if provided if provider_id: metadata["provider_id"] = provider_id - if provider_vector_db_id: - metadata["provider_vector_db_id"] = provider_vector_db_id + if provider_vector_store_id: + metadata["provider_vector_store_id"] = provider_vector_store_id store_info["metadata"] = metadata # Save to persistent storage (provider-specific) - await self._save_openai_vector_store(vector_db_id, store_info) + await self._save_openai_vector_store(vector_store_id, store_info) # Store in memory cache - self.openai_vector_stores[vector_db_id] = store_info + self.openai_vector_stores[vector_store_id] = store_info # Now that our vector store is created, attach any files that were provided file_ids = params.file_ids or [] - tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] + tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids] await asyncio.gather(*tasks) # Get the updated store info and return it - store_info = self.openai_vector_stores[vector_db_id] + store_info = self.openai_vector_stores[vector_store_id] return VectorStoreObject.model_validate(store_info) async def openai_list_vector_stores( @@ -567,7 +568,7 @@ class OpenAIVectorStoreMixin(ABC): # Also delete the underlying vector DB try: - await self.unregister_vector_db(vector_store_id) + await self.unregister_vector_store(vector_store_id) except Exception as e: logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}") diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 0375ecaaa..6c8746e92 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -23,8 +23,8 @@ from llama_stack.apis.common.content_types import ( ) from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody from llama_stack.apis.tools import RAGDocument -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.providers.datatypes import Api @@ -187,7 +187,7 @@ def make_overlapped_chunks( updated_timestamp=int(time.time()), chunk_window=chunk_window, chunk_tokenizer=default_tokenizer, - chunk_embedding_model=None, # This will be set in `VectorDBWithIndex.insert_chunks` + chunk_embedding_model=None, # This will be set in `VectorStoreWithIndex.insert_chunks` content_token_count=len(toks), metadata_token_count=len(metadata_tokens), ) @@ -255,8 +255,8 @@ class EmbeddingIndex(ABC): @dataclass -class VectorDBWithIndex: - vector_db: VectorDB +class VectorStoreWithIndex: + vector_store: VectorStore index: EmbeddingIndex inference_api: Api.inference @@ -269,14 +269,14 @@ class VectorDBWithIndex: if c.embedding is None: chunks_to_embed.append(c) if c.chunk_metadata: - c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model - c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension + c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model + c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension else: - _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) + _validate_embedding(c.embedding, i, self.vector_store.embedding_dimension) if chunks_to_embed: params = OpenAIEmbeddingsRequestWithExtraBody( - model=self.vector_db.embedding_model, + model=self.vector_store.embedding_model, input=[c.content for c in chunks_to_embed], ) resp = await self.inference_api.openai_embeddings(params) @@ -319,7 +319,7 @@ class VectorDBWithIndex: return await self.index.query_keyword(query_string, k, score_threshold) params = OpenAIEmbeddingsRequestWithExtraBody( - model=self.vector_db.embedding_model, + model=self.vector_store.embedding_model, input=[query_string], ) embeddings_response = await self.inference_api.openai_embeddings(params) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index a258eb1a0..041d10f10 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -37,6 +37,9 @@ def pytest_sessionstart(session): if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ: os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay" + if "LLAMA_STACK_LOGGING" not in os.environ: + os.environ["LLAMA_STACK_LOGGING"] = "all=warning" + if "SQLITE_STORE_DIR" not in os.environ: os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp() diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py index e5ca7a0db..1f67ddb24 100644 --- a/tests/integration/vector_io/test_vector_io.py +++ b/tests/integration/vector_io/test_vector_io.py @@ -49,46 +49,50 @@ def client_with_empty_registry(client_with_models): @vector_provider_wrapper -def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): - vector_db_name = "test_vector_db" +def test_vector_store_retrieve( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + vector_store_name = "test_vector_store" create_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = create_response.id + actual_vector_store_id = create_response.id # Retrieve the vector store and validate its properties - response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_db_id) + response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_store_id) assert response is not None - assert response.id == actual_vector_db_id - assert response.name == vector_db_name + assert response.id == actual_vector_store_id + assert response.name == vector_store_name assert response.id.startswith("vs_") @vector_provider_wrapper -def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): - vector_db_name = "test_vector_db" +def test_vector_store_register( + client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id +): + vector_store_name = "test_vector_store" response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = response.id - assert actual_vector_db_id.startswith("vs_") - assert actual_vector_db_id != vector_db_name + actual_vector_store_id = response.id + assert actual_vector_store_id.startswith("vs_") + assert actual_vector_store_id != vector_store_name vector_stores = client_with_empty_registry.vector_stores.list() assert len(vector_stores.data) == 1 vector_store = vector_stores.data[0] - assert vector_store.id == actual_vector_db_id - assert vector_store.name == vector_db_name + assert vector_store.id == actual_vector_store_id + assert vector_store.name == vector_store_name - client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_db_id) + client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_store_id) vector_stores = client_with_empty_registry.vector_stores.list() assert len(vector_stores.data) == 0 @@ -108,23 +112,23 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe def test_insert_chunks( client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id ): - vector_db_name = "test_vector_db" + vector_store_name = "test_vector_store" create_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = create_response.id + actual_vector_store_id = create_response.id client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=sample_chunks, ) response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="What is the capital of France?", ) assert response is not None @@ -133,7 +137,7 @@ def test_insert_chunks( query, expected_doc_id = test_case response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query=query, ) assert response is not None @@ -151,15 +155,15 @@ def test_insert_chunks_with_precomputed_embeddings( "inline::qdrant": {"score_threshold": -1.0}, "remote::qdrant": {"score_threshold": -1.0}, } - vector_db_name = "test_precomputed_embeddings_db" + vector_store_name = "test_precomputed_embeddings_db" register_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = register_response.id + actual_vector_store_id = register_response.id chunks_with_embeddings = [ Chunk( @@ -170,13 +174,13 @@ def test_insert_chunks_with_precomputed_embeddings( ] client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=chunks_with_embeddings, ) provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="precomputed embedding test", params=vector_io_provider_params_dict.get(provider, None), ) @@ -200,16 +204,16 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( "remote::qdrant": {"score_threshold": 0.0}, "inline::qdrant": {"score_threshold": 0.0}, } - vector_db_name = "test_precomputed_embeddings_db" + vector_store_name = "test_precomputed_embeddings_db" register_response = client_with_empty_registry.vector_stores.create( - name=vector_db_name, + name=vector_store_name, extra_body={ "embedding_model": embedding_model_id, "provider_id": vector_io_provider_id, }, ) - actual_vector_db_id = register_response.id + actual_vector_store_id = register_response.id chunks_with_embeddings = [ Chunk( @@ -220,13 +224,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb( ] client_with_empty_registry.vector_io.insert( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, chunks=chunks_with_embeddings, ) provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] response = client_with_empty_registry.vector_io.query( - vector_db_id=actual_vector_db_id, + vector_db_id=actual_vector_store_id, query="duplicate", params=vector_io_provider_params_dict.get(provider, None), ) diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index 997df0d78..dd3246cb3 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -21,7 +21,7 @@ async def test_single_provider_auto_selection(): Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384}) ] ) - mock_routing_table.register_vector_db = AsyncMock( + mock_routing_table.register_vector_store = AsyncMock( return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123") ) mock_routing_table.get_provider_impl = AsyncMock( diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index c78596018..2951ca2e5 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -10,8 +10,8 @@ from unittest.mock import AsyncMock, MagicMock, patch import numpy as np import pytest -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter @@ -31,7 +31,7 @@ def vector_provider(request): @pytest.fixture -def vector_db_id() -> str: +def vector_store_id() -> str: return f"test-vector-db-{random.randint(1, 100)}" @@ -149,8 +149,8 @@ async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inf ) collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}" await adapter.initialize() - await adapter.register_vector_db( - VectorDB( + await adapter.register_vector_store( + VectorStore( identifier=collection_id, provider_id="test_provider", embedding_model="test_model", @@ -186,8 +186,8 @@ async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding files_api=None, ) await adapter.initialize() - await adapter.register_vector_db( - VectorDB( + await adapter.register_vector_store( + VectorStore( identifier=f"faiss_test_collection_{np.random.randint(1e6)}", provider_id="test_provider", embedding_model="test_model", @@ -215,7 +215,7 @@ def mock_psycopg2_connection(): async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): connection, cursor = mock_psycopg2_connection - vector_db = VectorDB( + vector_store = VectorStore( identifier="test-vector-db", embedding_model="test-model", embedding_dimension=embedding_dimension, @@ -225,7 +225,7 @@ async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"): - index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE") + index = PGVectorIndex(vector_store, embedding_dimension, connection, distance_metric="COSINE") index._test_chunks = [] original_add_chunks = index.add_chunks @@ -281,30 +281,30 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd await adapter.initialize() adapter.conn = mock_conn - async def mock_insert_chunks(vector_db_id, chunks, ttl_seconds=None): - index = await adapter._get_and_cache_vector_db_index(vector_db_id) + async def mock_insert_chunks(vector_store_id, chunks, ttl_seconds=None): + index = await adapter._get_and_cache_vector_store_index(vector_store_id) if not index: - raise ValueError(f"Vector DB {vector_db_id} not found") + raise ValueError(f"Vector DB {vector_store_id} not found") await index.insert_chunks(chunks) adapter.insert_chunks = mock_insert_chunks - async def mock_query_chunks(vector_db_id, query, params=None): - index = await adapter._get_and_cache_vector_db_index(vector_db_id) + async def mock_query_chunks(vector_store_id, query, params=None): + index = await adapter._get_and_cache_vector_store_index(vector_store_id) if not index: - raise ValueError(f"Vector DB {vector_db_id} not found") + raise ValueError(f"Vector DB {vector_store_id} not found") return await index.query_chunks(query, params) adapter.query_chunks = mock_query_chunks - test_vector_db = VectorDB( + test_vector_store = VectorStore( identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}", provider_id="test_provider", embedding_model="test_model", embedding_dimension=embedding_dimension, ) - await adapter.register_vector_db(test_vector_db) - adapter.test_collection_id = test_vector_db.identifier + await adapter.register_vector_store(test_vector_store) + adapter.test_collection_id = test_vector_store.identifier yield adapter await adapter.shutdown() diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index fa5c5f56b..7b870d16e 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -11,8 +11,8 @@ import numpy as np import pytest from llama_stack.apis.files import Files -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse +from llama_stack.apis.vector_stores import VectorStore from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import ( @@ -43,8 +43,8 @@ def embedding_dimension(): @pytest.fixture -def vector_db_id(): - return "test_vector_db" +def vector_store_id(): + return "test_vector_store" @pytest.fixture @@ -61,12 +61,12 @@ def sample_embeddings(embedding_dimension): @pytest.fixture -def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock: - mock_vector_db = MagicMock(spec=VectorDB) - mock_vector_db.embedding_model = "mock_embedding_model" - mock_vector_db.identifier = vector_db_id - mock_vector_db.embedding_dimension = embedding_dimension - return mock_vector_db +def mock_vector_store(vector_store_id, embedding_dimension) -> MagicMock: + mock_vector_store = MagicMock(spec=VectorStore) + mock_vector_store.embedding_model = "mock_embedding_model" + mock_vector_store.identifier = vector_store_id + mock_vector_store.embedding_dimension = embedding_dimension + return mock_vector_store @pytest.fixture diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index ad55b9336..65d7b7602 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -12,7 +12,6 @@ import numpy as np import pytest from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, @@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import ( VectorStoreChunkingStrategyAuto, VectorStoreFileObject, ) +from llama_stack.apis.vector_stores import VectorStore from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX # This test is a unit test for the inline VectorIO providers. This should only contain @@ -71,7 +71,7 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): key = f"{VECTOR_DBS_PREFIX}db1" - dummy = VectorDB( + dummy = VectorStore( identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) await vector_io_adapter.kvstore.set(key=key, value=json.dumps(dummy.model_dump())) @@ -81,10 +81,10 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.initialize() - dummy = VectorDB( + dummy = VectorStore( identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) - await vector_io_adapter.register_vector_db(dummy) + await vector_io_adapter.register_vector_store(dummy) await vector_io_adapter.shutdown() await vector_io_adapter.initialize() @@ -92,15 +92,15 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.shutdown() -async def test_register_and_unregister_vector_db(vector_io_adapter): +async def test_register_and_unregister_vector_store(vector_io_adapter): unique_id = f"foo_db_{np.random.randint(1e6)}" - dummy = VectorDB( + dummy = VectorStore( identifier=unique_id, provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 ) - await vector_io_adapter.register_vector_db(dummy) + await vector_io_adapter.register_vector_store(dummy) assert dummy.identifier in vector_io_adapter.cache - await vector_io_adapter.unregister_vector_db(dummy.identifier) + await vector_io_adapter.unregister_vector_store(dummy.identifier) assert dummy.identifier not in vector_io_adapter.cache @@ -121,7 +121,7 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter): async def test_insert_chunks_missing_db_raises(vector_io_adapter): - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) + vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None) with pytest.raises(ValueError): await vector_io_adapter.insert_chunks("db_not_exist", []) @@ -170,7 +170,7 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter async def test_query_chunks_missing_db_raises(vector_io_adapter): - vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) + vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None) with pytest.raises(ValueError): await vector_io_adapter.query_chunks("db_missing", "q", None) @@ -182,7 +182,7 @@ async def test_save_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -198,7 +198,7 @@ async def test_update_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -214,7 +214,7 @@ async def test_delete_openai_vector_store(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -229,7 +229,7 @@ async def test_load_openai_vector_stores(vector_io_adapter): "id": store_id, "name": "Test Store", "description": "A test OpenAI vector store", - "vector_db_id": "test_db", + "vector_store_id": "test_db", "embedding_model": "test_model", } @@ -998,8 +998,8 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter): async def test_embedding_config_from_metadata(vector_io_adapter): """Test that embedding configuration is correctly extracted from metadata.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1015,9 +1015,9 @@ async def test_embedding_config_from_metadata(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) - # Verify VectorDB was registered with correct embedding config from metadata - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + # Verify VectorStore was registered with correct embedding config from metadata + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "test-embedding-model" assert call_args.embedding_dimension == 512 @@ -1025,8 +1025,8 @@ async def test_embedding_config_from_metadata(vector_io_adapter): async def test_embedding_config_from_extra_body(vector_io_adapter): """Test that embedding configuration is correctly extracted from extra_body when metadata is empty.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1042,9 +1042,9 @@ async def test_embedding_config_from_extra_body(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) - # Verify VectorDB was registered with correct embedding config from extra_body - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + # Verify VectorStore was registered with correct embedding config from extra_body + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "extra-body-model" assert call_args.embedding_dimension == 1024 @@ -1052,8 +1052,8 @@ async def test_embedding_config_from_extra_body(vector_io_adapter): async def test_embedding_config_consistency_check_passes(vector_io_adapter): """Test that consistent embedding config in both metadata and extra_body passes validation.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1073,8 +1073,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Should not raise any error and use metadata config - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "consistent-model" assert call_args.embedding_dimension == 768 @@ -1082,8 +1082,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter): async def test_embedding_config_inconsistency_errors(vector_io_adapter): """Test that inconsistent embedding config between metadata and extra_body raises errors.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1104,7 +1104,7 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Reset mock for second test - vector_io_adapter.register_vector_db.reset_mock() + vector_io_adapter.register_vector_store.reset_mock() # Test with inconsistent embedding dimension params = OpenAICreateVectorStoreRequestWithExtraBody( @@ -1126,8 +1126,8 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter): async def test_embedding_config_defaults_when_missing(vector_io_adapter): """Test that embedding dimension defaults to 768 when not provided.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" @@ -1143,8 +1143,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter): await vector_io_adapter.openai_create_vector_store(params) # Should default to 768 dimensions - vector_io_adapter.register_vector_db.assert_called_once() - call_args = vector_io_adapter.register_vector_db.call_args[0][0] + vector_io_adapter.register_vector_store.assert_called_once() + call_args = vector_io_adapter.register_vector_store.call_args[0][0] assert call_args.embedding_model == "model-without-dimension" assert call_args.embedding_dimension == 768 @@ -1152,8 +1152,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter): async def test_embedding_config_required_model_missing(vector_io_adapter): """Test that missing embedding model raises error.""" - # Mock register_vector_db to avoid actual registration - vector_io_adapter.register_vector_db = AsyncMock() + # Mock register_vector_store to avoid actual registration + vector_io_adapter.register_vector_store = AsyncMock() # Set provider_id attribute for the adapter vector_io_adapter.__provider_id__ = "test_provider" # Mock the default model lookup to return None (no default model available) diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index a45b66f02..c012bc4f0 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -18,7 +18,7 @@ from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRunti class TestRagQuery: - async def test_query_raises_on_empty_vector_db_ids(self): + async def test_query_raises_on_empty_vector_store_ids(self): rag_tool = MemoryToolRuntimeImpl( config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock() ) @@ -82,7 +82,7 @@ class TestRagQuery: with pytest.raises(ValueError): RAGQueryConfig(mode="wrong_mode") - async def test_query_adds_vector_db_id_to_chunk_metadata(self): + async def test_query_adds_vector_store_id_to_chunk_metadata(self): rag_tool = MemoryToolRuntimeImpl( config=MagicMock(), vector_io_api=MagicMock(), diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 1e40c98e8..200da5c26 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -21,7 +21,7 @@ from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_io import Chunk from llama_stack.providers.utils.memory.vector_store import ( URL, - VectorDBWithIndex, + VectorStoreWithIndex, _validate_embedding, content_from_doc, make_overlapped_chunks, @@ -206,15 +206,15 @@ class TestVectorStore: assert str(excinfo.value.__cause__) == "Cannot convert to string" -class TestVectorDBWithIndex: +class TestVectorStoreWithIndex: async def test_insert_chunks_without_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model without embeddings" + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model without embeddings" mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -227,7 +227,7 @@ class TestVectorDBWithIndex: OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) # Verify openai_embeddings was called with correct params mock_inference_api.openai_embeddings.assert_called_once() @@ -243,14 +243,14 @@ class TestVectorDBWithIndex: assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) async def test_insert_chunks_with_valid_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model with embeddings" - mock_vector_db.embedding_dimension = 3 + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model with embeddings" + mock_vector_store.embedding_dimension = 3 mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -258,7 +258,7 @@ class TestVectorDBWithIndex: Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_called_once() @@ -267,14 +267,14 @@ class TestVectorDBWithIndex: assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) async def test_insert_chunks_with_invalid_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_dimension = 3 - mock_vector_db.embedding_model = "test-model with invalid embeddings" + mock_vector_store = MagicMock() + mock_vector_store.embedding_dimension = 3 + mock_vector_store.embedding_model = "test-model with invalid embeddings" mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) # Verify Chunk raises ValueError for invalid embedding type @@ -283,7 +283,7 @@ class TestVectorDBWithIndex: # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called) with pytest.raises(ValueError, match="Input should be a valid list"): - await vector_db_with_index.insert_chunks( + await vector_store_with_index.insert_chunks( [ Chunk(content="Test 1", embedding=None, metadata={}), Chunk(content="Test 2", embedding="invalid_type", metadata={}), @@ -292,7 +292,7 @@ class TestVectorDBWithIndex: # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called) with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "): - await vector_db_with_index.insert_chunks( + await vector_store_with_index.insert_chunks( Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={}) ) @@ -300,20 +300,20 @@ class TestVectorDBWithIndex: Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}), ] with pytest.raises(ValueError, match="has dimension 4, expected 3"): - await vector_db_with_index.insert_chunks(chunks_wrong_dim) + await vector_store_with_index.insert_chunks(chunks_wrong_dim) mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_not_called() async def test_insert_chunks_with_partially_precomputed_embeddings(self): - mock_vector_db = MagicMock() - mock_vector_db.embedding_model = "test-model with partial embeddings" - mock_vector_db.embedding_dimension = 3 + mock_vector_store = MagicMock() + mock_vector_store.embedding_model = "test-model with partial embeddings" + mock_vector_store.embedding_dimension = 3 mock_index = AsyncMock() mock_inference_api = AsyncMock() - vector_db_with_index = VectorDBWithIndex( - vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api + vector_store_with_index = VectorStoreWithIndex( + vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api ) chunks = [ @@ -327,7 +327,7 @@ class TestVectorDBWithIndex: OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1), ] - await vector_db_with_index.insert_chunks(chunks) + await vector_store_with_index.insert_chunks(chunks) # Verify openai_embeddings was called with correct params mock_inference_api.openai_embeddings.assert_called_once() diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index 95022ad33..d4c9786d1 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -8,8 +8,8 @@ import pytest from llama_stack.apis.inference import Model -from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.core.datatypes import VectorDBWithOwner +from llama_stack.apis.vector_stores import VectorStore +from llama_stack.core.datatypes import VectorStoreWithOwner from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.core.store.registry import ( KEY_FORMAT, @@ -20,12 +20,12 @@ from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_b @pytest.fixture -def sample_vector_db(): - return VectorDB( - identifier="test_vector_db", +def sample_vector_store(): + return VectorStore( + identifier="test_vector_store", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) @@ -45,17 +45,17 @@ async def test_registry_initialization(disk_dist_registry): assert result is None -async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model): - print(f"Registering {sample_vector_db}") - await disk_dist_registry.register(sample_vector_db) +async def test_basic_registration(disk_dist_registry, sample_vector_store, sample_model): + print(f"Registering {sample_vector_store}") + await disk_dist_registry.register(sample_vector_store) print(f"Registering {sample_model}") await disk_dist_registry.register(sample_model) - print("Getting vector_db") - result_vector_db = await disk_dist_registry.get("vector_db", "test_vector_db") - assert result_vector_db is not None - assert result_vector_db.identifier == sample_vector_db.identifier - assert result_vector_db.embedding_model == sample_vector_db.embedding_model - assert result_vector_db.provider_id == sample_vector_db.provider_id + print("Getting vector_store") + result_vector_store = await disk_dist_registry.get("vector_store", "test_vector_store") + assert result_vector_store is not None + assert result_vector_store.identifier == sample_vector_store.identifier + assert result_vector_store.embedding_model == sample_vector_store.embedding_model + assert result_vector_store.provider_id == sample_vector_store.provider_id result_model = await disk_dist_registry.get("model", "test_model") assert result_model is not None @@ -63,11 +63,11 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m assert result_model.provider_id == sample_model.provider_id -async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model): +async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_store, sample_model): # First populate the disk registry disk_registry = DiskDistributionRegistry(sqlite_kvstore) await disk_registry.initialize() - await disk_registry.register(sample_vector_db) + await disk_registry.register(sample_vector_store) await disk_registry.register(sample_model) # Test cached version loads from disk @@ -79,29 +79,29 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, ) await cached_registry.initialize() - result_vector_db = await cached_registry.get("vector_db", "test_vector_db") - assert result_vector_db is not None - assert result_vector_db.identifier == sample_vector_db.identifier - assert result_vector_db.embedding_model == sample_vector_db.embedding_model - assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension - assert result_vector_db.provider_id == sample_vector_db.provider_id + result_vector_store = await cached_registry.get("vector_store", "test_vector_store") + assert result_vector_store is not None + assert result_vector_store.identifier == sample_vector_store.identifier + assert result_vector_store.embedding_model == sample_vector_store.embedding_model + assert result_vector_store.embedding_dimension == sample_vector_store.embedding_dimension + assert result_vector_store.provider_id == sample_vector_store.provider_id async def test_cached_registry_updates(cached_disk_dist_registry): - new_vector_db = VectorDB( - identifier="test_vector_db_2", + new_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", ) - await cached_disk_dist_registry.register(new_vector_db) + await cached_disk_dist_registry.register(new_vector_store) # Verify in cache - result_vector_db = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") - assert result_vector_db is not None - assert result_vector_db.identifier == new_vector_db.identifier - assert result_vector_db.provider_id == new_vector_db.provider_id + result_vector_store = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2") + assert result_vector_store is not None + assert result_vector_store.identifier == new_vector_store.identifier + assert result_vector_store.provider_id == new_vector_store.provider_id # Verify persisted to disk db_path = cached_disk_dist_registry.kvstore.db_path @@ -111,87 +111,89 @@ async def test_cached_registry_updates(cached_disk_dist_registry): await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry")) ) await new_registry.initialize() - result_vector_db = await new_registry.get("vector_db", "test_vector_db_2") - assert result_vector_db is not None - assert result_vector_db.identifier == new_vector_db.identifier - assert result_vector_db.provider_id == new_vector_db.provider_id + result_vector_store = await new_registry.get("vector_store", "test_vector_store_2") + assert result_vector_store is not None + assert result_vector_store.identifier == new_vector_store.identifier + assert result_vector_store.provider_id == new_vector_store.provider_id async def test_duplicate_provider_registration(cached_disk_dist_registry): - original_vector_db = VectorDB( - identifier="test_vector_db_2", + original_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", ) - assert await cached_disk_dist_registry.register(original_vector_db) + assert await cached_disk_dist_registry.register(original_vector_store) - duplicate_vector_db = VectorDB( - identifier="test_vector_db_2", + duplicate_vector_store = VectorStore( + identifier="test_vector_store_2", embedding_model="different-model", embedding_dimension=768, - provider_resource_id="test_vector_db_2", + provider_resource_id="test_vector_store_2", provider_id="baz", # Same provider_id ) - with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db_2' already exists"): - await cached_disk_dist_registry.register(duplicate_vector_db) + with pytest.raises( + ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store_2' already exists" + ): + await cached_disk_dist_registry.register(duplicate_vector_store) - result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") + result = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2") assert result is not None - assert result.embedding_model == original_vector_db.embedding_model # Original values preserved + assert result.embedding_model == original_vector_store.embedding_model # Original values preserved async def test_get_all_objects(cached_disk_dist_registry): # Create multiple test banks # Create multiple test banks - test_vector_dbs = [ - VectorDB( - identifier=f"test_vector_db_{i}", + test_vector_stores = [ + VectorStore( + identifier=f"test_vector_store_{i}", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id=f"test_vector_db_{i}", + provider_resource_id=f"test_vector_store_{i}", provider_id=f"provider_{i}", ) for i in range(3) ] - # Register all vector_dbs - for vector_db in test_vector_dbs: - await cached_disk_dist_registry.register(vector_db) + # Register all vector_stores + for vector_store in test_vector_stores: + await cached_disk_dist_registry.register(vector_store) # Test get_all retrieval all_results = await cached_disk_dist_registry.get_all() assert len(all_results) == 3 - # Verify each vector_db was stored correctly - for original_vector_db in test_vector_dbs: - matching_vector_dbs = [v for v in all_results if v.identifier == original_vector_db.identifier] - assert len(matching_vector_dbs) == 1 - stored_vector_db = matching_vector_dbs[0] - assert stored_vector_db.embedding_model == original_vector_db.embedding_model - assert stored_vector_db.provider_id == original_vector_db.provider_id - assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension + # Verify each vector_store was stored correctly + for original_vector_store in test_vector_stores: + matching_vector_stores = [v for v in all_results if v.identifier == original_vector_store.identifier] + assert len(matching_vector_stores) == 1 + stored_vector_store = matching_vector_stores[0] + assert stored_vector_store.embedding_model == original_vector_store.embedding_model + assert stored_vector_store.provider_id == original_vector_store.provider_id + assert stored_vector_store.embedding_dimension == original_vector_store.embedding_dimension async def test_parse_registry_values_error_handling(sqlite_kvstore): - valid_db = VectorDB( - identifier="valid_vector_db", + valid_db = VectorStore( + identifier="valid_vector_store", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, - provider_resource_id="valid_vector_db", + provider_resource_id="valid_vector_store", provider_id="test-provider", ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json() + KEY_FORMAT.format(type="vector_store", identifier="valid_vector_store"), valid_db.model_dump_json() ) - await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json") + await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_store", identifier="corrupted_json"), "{not valid json") await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="missing_fields"), - '{"type": "vector_db", "identifier": "missing_fields"}', + KEY_FORMAT.format(type="vector_store", identifier="missing_fields"), + '{"type": "vector_store", "identifier": "missing_fields"}', ) test_registry = DiskDistributionRegistry(sqlite_kvstore) @@ -202,18 +204,18 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore): # Should have filtered out the invalid entries assert len(all_objects) == 1 - assert all_objects[0].identifier == "valid_vector_db" + assert all_objects[0].identifier == "valid_vector_store" # Check that the get method also handles errors correctly - invalid_obj = await test_registry.get("vector_db", "corrupted_json") + invalid_obj = await test_registry.get("vector_store", "corrupted_json") assert invalid_obj is None - invalid_obj = await test_registry.get("vector_db", "missing_fields") + invalid_obj = await test_registry.get("vector_store", "missing_fields") assert invalid_obj is None async def test_cached_registry_error_handling(sqlite_kvstore): - valid_db = VectorDB( + valid_db = VectorStore( identifier="valid_cached_db", embedding_model="nomic-embed-text-v1.5", embedding_dimension=768, @@ -222,12 +224,12 @@ async def test_cached_registry_error_handling(sqlite_kvstore): ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json() + KEY_FORMAT.format(type="vector_store", identifier="valid_cached_db"), valid_db.model_dump_json() ) await sqlite_kvstore.set( - KEY_FORMAT.format(type="vector_db", identifier="invalid_cached_db"), - '{"type": "vector_db", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string + KEY_FORMAT.format(type="vector_store", identifier="invalid_cached_db"), + '{"type": "vector_store", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string ) cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore) @@ -237,63 +239,65 @@ async def test_cached_registry_error_handling(sqlite_kvstore): assert len(all_objects) == 1 assert all_objects[0].identifier == "valid_cached_db" - invalid_obj = await cached_registry.get("vector_db", "invalid_cached_db") + invalid_obj = await cached_registry.get("vector_store", "invalid_cached_db") assert invalid_obj is None async def test_double_registration_identical_objects(disk_dist_registry): """Test that registering identical objects succeeds (idempotent).""" - vector_db = VectorDBWithOwner( - identifier="test_vector_db", + vector_store = VectorStoreWithOwner( + identifier="test_vector_store", embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) # First registration should succeed - result1 = await disk_dist_registry.register(vector_db) + result1 = await disk_dist_registry.register(vector_store) assert result1 is True # Second registration of identical object should also succeed (idempotent) - result2 = await disk_dist_registry.register(vector_db) + result2 = await disk_dist_registry.register(vector_store) assert result2 is True # Verify object exists and is unchanged - retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") + retrieved = await disk_dist_registry.get("vector_store", "test_vector_store") assert retrieved is not None - assert retrieved.identifier == vector_db.identifier - assert retrieved.embedding_model == vector_db.embedding_model + assert retrieved.identifier == vector_store.identifier + assert retrieved.embedding_model == vector_store.embedding_model async def test_double_registration_different_objects(disk_dist_registry): """Test that registering different objects with same identifier fails.""" - vector_db1 = VectorDBWithOwner( - identifier="test_vector_db", + vector_store1 = VectorStoreWithOwner( + identifier="test_vector_store", embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) - vector_db2 = VectorDBWithOwner( - identifier="test_vector_db", # Same identifier + vector_store2 = VectorStoreWithOwner( + identifier="test_vector_store", # Same identifier embedding_model="different-model", # Different embedding model embedding_dimension=384, - provider_resource_id="test_vector_db", + provider_resource_id="test_vector_store", provider_id="test-provider", ) # First registration should succeed - result1 = await disk_dist_registry.register(vector_db1) + result1 = await disk_dist_registry.register(vector_store1) assert result1 is True # Second registration with different data should fail - with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db' already exists"): - await disk_dist_registry.register(vector_db2) + with pytest.raises( + ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store' already exists" + ): + await disk_dist_registry.register(vector_store2) # Verify original object is unchanged - retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") + retrieved = await disk_dist_registry.get("vector_store", "test_vector_store") assert retrieved is not None assert retrieved.embedding_model == "all-MiniLM-L6-v2" # Original value diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py index f21bbdd67..d6d4f4f23 100644 --- a/tests/unit/server/test_server.py +++ b/tests/unit/server/test_server.py @@ -41,7 +41,7 @@ class TestTranslateException: self.identifier = identifier self.owner = owner - resource = MockResource("vector_db", "test-db") + resource = MockResource("vector_store", "test-db") exc = AccessDeniedError("create", resource, user) result = translate_exception(exc) @@ -49,7 +49,7 @@ class TestTranslateException: assert isinstance(result, HTTPException) assert result.status_code == 403 assert "test-user" in result.detail - assert "vector_db::test-db" in result.detail + assert "vector_store::test-db" in result.detail assert "create" in result.detail assert "roles=['user']" in result.detail assert "teams=['dev']" in result.detail