chore(cleanup)!: kill vector_db references as far as possible (#3864)

There should not be "vector db" anywhere.
This commit is contained in:
Ashwin Bharambe 2025-10-20 20:06:16 -07:00 committed by GitHub
parent 444f6c88f3
commit 122de785c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
46 changed files with 701 additions and 822 deletions

View file

@ -6440,7 +6440,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -9132,7 +9132,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -9440,7 +9440,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -10203,7 +10203,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -11325,7 +11325,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -12652,7 +12652,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark

View file

@ -32,7 +32,6 @@ Commands:
scoring_functions Manage scoring functions.
shields Manage safety shield services.
toolgroups Manage available tool groups.
vector_dbs Manage vector databases.
```
### `llama-stack-client configure`
@ -211,53 +210,6 @@ Unregister a model from distribution endpoint
llama-stack-client models unregister <model_id>
```
## Vector DB Management
Manage vector databases.
### `llama-stack-client vector_dbs list`
Show available vector dbs on distribution endpoint
```bash
llama-stack-client vector_dbs list
```
```
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ provider_resource_id ┃ vector_db_type ┃ params ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ my_demo_vector_db │ faiss │ my_demo_vector_db │ │ embedding_dimension: 768 │
│ │ │ │ │ embedding_model: nomic-embed-text-v1.5 │
│ │ │ │ │ type: vector_db │
│ │ │ │ │ │
└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘
```
### `llama-stack-client vector_dbs register`
Create a new vector db
```bash
llama-stack-client vector_dbs register <vector-db-id> [--provider-id <provider-id>] [--provider-vector-db-id <provider-vector-db-id>] [--embedding-model <embedding-model>] [--embedding-dimension <embedding-dimension>]
```
Required arguments:
- `VECTOR_DB_ID`: Vector DB ID
Optional arguments:
- `--provider-id`: Provider ID for the vector db
- `--provider-vector-db-id`: Provider's vector db ID
- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5`
- `--embedding-dimension`: Dimension of embeddings. Default: 768
### `llama-stack-client vector_dbs unregister`
Delete a vector db
```bash
llama-stack-client vector_dbs unregister <vector-db-id>
```
Required arguments:
- `VECTOR_DB_ID`: Vector DB ID
## Shield Management
Manage safety shield services.
### `llama-stack-client shields list`

View file

@ -5547,7 +5547,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -5798,7 +5798,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",

View file

@ -4114,7 +4114,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -4303,7 +4303,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark

View file

@ -1850,7 +1850,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -3983,7 +3983,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",

View file

@ -1320,7 +1320,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -2927,7 +2927,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark

View file

@ -6800,7 +6800,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -10205,7 +10205,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -10687,7 +10687,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -11740,7 +11740,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",

View file

@ -5227,7 +5227,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -7919,7 +7919,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -8227,7 +8227,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -8990,7 +8990,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark

View file

@ -8472,7 +8472,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -11877,7 +11877,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -12359,7 +12359,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -13412,7 +13412,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -14959,7 +14959,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",
@ -16704,7 +16704,7 @@
"enum": [
"model",
"shield",
"vector_db",
"vector_store",
"dataset",
"scoring_function",
"benchmark",

View file

@ -6440,7 +6440,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -9132,7 +9132,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -9440,7 +9440,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -10203,7 +10203,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -11325,7 +11325,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark
@ -12652,7 +12652,7 @@ components:
enum:
- model
- shield
- vector_db
- vector_store
- dataset
- scoring_function
- benchmark

View file

@ -121,7 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
models = "models"
shields = "shields"
vector_dbs = "vector_dbs" # only used for routing
vector_stores = "vector_stores" # only used for routing table
datasets = "datasets"
scoring_functions = "scoring_functions"
benchmarks = "benchmarks"

View file

@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
class ResourceType(StrEnum):
model = "model"
shield = "shield"
vector_db = "vector_db"
vector_store = "vector_store"
dataset = "dataset"
scoring_function = "scoring_function"
benchmark = "benchmark"
@ -34,4 +34,4 @@ class Resource(BaseModel):
provider_id: str = Field(description="ID of the provider that owns this resource")
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")

View file

@ -1,93 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class VectorDB(Resource):
"""Vector database resource for storing and querying vector embeddings.
:param type: Type of resource, always 'vector_db' for vector databases
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
"""
type: Literal[ResourceType.vector_db] = ResourceType.vector_db
embedding_model: str
embedding_dimension: int
vector_db_name: str | None = None
@property
def vector_db_id(self) -> str:
return self.identifier
@property
def provider_vector_db_id(self) -> str | None:
return self.provider_resource_id
class VectorDBInput(BaseModel):
"""Input parameters for creating or configuring a vector database.
:param vector_db_id: Unique identifier for the vector database
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
:param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
"""
vector_db_id: str
embedding_model: str
embedding_dimension: int
provider_id: str | None = None
provider_vector_db_id: str | None = None
class ListVectorDBsResponse(BaseModel):
"""Response from listing vector databases.
:param data: List of vector databases
"""
data: list[VectorDB]
@runtime_checkable
class VectorDBs(Protocol):
"""Internal protocol for vector_dbs routing - no public API endpoints."""
async def list_vector_dbs(self) -> ListVectorDBsResponse:
"""Internal method to list vector databases."""
...
async def get_vector_db(
self,
vector_db_id: str,
) -> VectorDB:
"""Internal method to get a vector database by ID."""
...
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> VectorDB:
"""Internal method to register a vector database."""
...
async def unregister_vector_db(self, vector_db_id: str) -> None:
"""Internal method to unregister a vector database."""
...

View file

@ -15,7 +15,7 @@ from fastapi import Body
from pydantic import BaseModel, Field
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
total: int
# TODO: rename this as OpenAIVectorStore
@json_schema_type
class VectorStoreObject(BaseModel):
"""OpenAI Vector Store object.
@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
chunking_strategy: VectorStoreChunkingStrategy | None = None
class VectorDBStore(Protocol):
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
class VectorStoreTable(Protocol):
def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
@runtime_checkable
@trace_protocol
class VectorIO(Protocol):
vector_db_store: VectorDBStore | None = None
vector_store_table: VectorStoreTable | None = None
# this will just block now until chunks are inserted, but it should
# probably return a Job instance which can be polled for completion
# TODO: rename vector_db_id to vector_store_id once Stainless is working
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
async def insert_chunks(
self,
@ -546,6 +548,7 @@ class VectorIO(Protocol):
"""
...
# TODO: rename vector_db_id to vector_store_id once Stainless is working
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
async def query_chunks(
self,

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .vector_dbs import *
from .vector_stores import *

View file

@ -0,0 +1,51 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Literal
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
# Internal resource type for storing the vector store routing and other information
class VectorStore(Resource):
"""Vector database resource for storing and querying vector embeddings.
:param type: Type of resource, always 'vector_store' for vector stores
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
"""
type: Literal[ResourceType.vector_store] = ResourceType.vector_store
embedding_model: str
embedding_dimension: int
vector_store_name: str | None = None
@property
def vector_store_id(self) -> str:
return self.identifier
@property
def provider_vector_store_id(self) -> str | None:
return self.provider_resource_id
class VectorStoreInput(BaseModel):
"""Input parameters for creating or configuring a vector database.
:param vector_store_id: Unique identifier for the vector store
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
"""
vector_store_id: str
embedding_model: str
embedding_dimension: int
provider_id: str | None = None
provider_vector_store_id: str | None = None

View file

@ -41,7 +41,7 @@ class AccessRule(BaseModel):
A rule defines a list of action either to permit or to forbid. It may specify a
principal or a resource that must match for the rule to take effect. The resource
to match should be specified in the form of a type qualified identifier, e.g.
model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
e.g. model::*. If the principal or resource are not specified, they will match all
requests.
@ -79,9 +79,9 @@ class AccessRule(BaseModel):
description: any user has read access to any resource created by a member of their team
- forbid:
actions: [create, read, delete]
resource: vector_db::*
resource: vector_store::*
unless: user with admin in roles
description: only user with admin role can use vector_db resources
description: only user with admin role can use vector_store resources
"""

View file

@ -23,8 +23,8 @@ from llama_stack.apis.scoring import Scoring
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
from llama_stack.apis.shields import Shield, ShieldInput
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.storage.datatypes import (
KVStoreReference,
@ -71,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner):
pass
class VectorDBWithOwner(VectorDB, ResourceWithOwner):
class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
pass
@ -91,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
pass
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
RoutableObjectWithProvider = Annotated[
ModelWithOwner
| ShieldWithOwner
| VectorDBWithOwner
| VectorStoreWithOwner
| DatasetWithOwner
| ScoringFnWithOwner
| BenchmarkWithOwner
@ -427,7 +427,7 @@ class RegisteredResources(BaseModel):
models: list[ModelInput] = Field(default_factory=list)
shields: list[ShieldInput] = Field(default_factory=list)
vector_dbs: list[VectorDBInput] = Field(default_factory=list)
vector_stores: list[VectorStoreInput] = Field(default_factory=list)
datasets: list[DatasetInput] = Field(default_factory=list)
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
benchmarks: list[BenchmarkInput] = Field(default_factory=list)

View file

@ -64,7 +64,7 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
router_api=Api.tool_runtime,
),
AutoRoutedApiInfo(
routing_table_api=Api.vector_dbs,
routing_table_api=Api.vector_stores,
router_api=Api.vector_io,
),
]

View file

@ -29,8 +29,8 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
from llama_stack.apis.shields import Shields
from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.core.client import get_client_impl
from llama_stack.core.datatypes import (
@ -82,7 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
Api.inspect: Inspect,
Api.batches: Batches,
Api.vector_io: VectorIO,
Api.vector_dbs: VectorDBs,
Api.vector_stores: VectorStore,
Api.models: Models,
Api.safety: Safety,
Api.shields: Shields,

View file

@ -29,7 +29,7 @@ async def get_routing_table_impl(
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
from ..routing_tables.shields import ShieldsRoutingTable
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
from ..routing_tables.vector_dbs import VectorDBsRoutingTable
from ..routing_tables.vector_stores import VectorStoresRoutingTable
api_to_tables = {
"models": ModelsRoutingTable,
@ -38,7 +38,7 @@ async def get_routing_table_impl(
"scoring_functions": ScoringFunctionsRoutingTable,
"benchmarks": BenchmarksRoutingTable,
"tool_groups": ToolGroupsRoutingTable,
"vector_dbs": VectorDBsRoutingTable,
"vector_stores": VectorStoresRoutingTable,
}
if api.value not in api_to_tables:

View file

@ -37,24 +37,24 @@ class ToolRuntimeRouter(ToolRuntime):
async def query(
self,
content: InterleavedContent,
vector_db_ids: list[str],
vector_store_ids: list[str],
query_config: RAGQueryConfig | None = None,
) -> RAGQueryResult:
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
provider = await self.routing_table.get_provider_impl("knowledge_search")
return await provider.query(content, vector_db_ids, query_config)
return await provider.query(content, vector_store_ids, query_config)
async def insert(
self,
documents: list[RAGDocument],
vector_db_id: str,
vector_store_id: str,
chunk_size_in_tokens: int = 512,
) -> None:
logger.debug(
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
)
provider = await self.routing_table.get_provider_impl("insert_into_memory")
return await provider.insert(documents, vector_db_id, chunk_size_in_tokens)
return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)
def __init__(
self,

View file

@ -71,25 +71,6 @@ class VectorIORouter(VectorIO):
raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> None:
logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
await self.routing_table.register_vector_db(
vector_db_id,
embedding_model,
embedding_dimension,
provider_id,
vector_db_name,
provider_vector_db_id,
)
async def insert_chunks(
self,
vector_db_id: str,
@ -165,22 +146,22 @@ class VectorIORouter(VectorIO):
else:
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
vector_db_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db(
vector_db_id=vector_db_id,
vector_store_id = f"vs_{uuid.uuid4()}"
registered_vector_store = await self.routing_table.register_vector_store(
vector_store_id=vector_store_id,
embedding_model=embedding_model,
embedding_dimension=embedding_dimension,
provider_id=provider_id,
provider_vector_db_id=vector_db_id,
vector_db_name=params.name,
provider_vector_store_id=vector_store_id,
vector_store_name=params.name,
)
provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier)
provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)
# Update model_extra with registered values so provider uses the already-registered vector_db
# Update model_extra with registered values so provider uses the already-registered vector_store
if params.model_extra is None:
params.model_extra = {}
params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id
params.model_extra["provider_id"] = registered_vector_db.provider_id
params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
params.model_extra["provider_id"] = registered_vector_store.provider_id
if embedding_model is not None:
params.model_extra["embedding_model"] = embedding_model
if embedding_dimension is not None:
@ -198,15 +179,15 @@ class VectorIORouter(VectorIO):
logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
# Route to default provider for now - could aggregate from all providers in the future
# call retrieve on each vector dbs to get list of vector stores
vector_dbs = await self.routing_table.get_all_with_type("vector_db")
vector_stores = await self.routing_table.get_all_with_type("vector_store")
all_stores = []
for vector_db in vector_dbs:
for vector_store in vector_stores:
try:
provider = await self.routing_table.get_provider_impl(vector_db.identifier)
vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier)
provider = await self.routing_table.get_provider_impl(vector_store.identifier)
vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
all_stores.append(vector_store)
except Exception as e:
logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}")
logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
continue
# Sort by created_at

View file

@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
elif api == Api.safety:
return await p.register_shield(obj)
elif api == Api.vector_io:
return await p.register_vector_db(obj)
return await p.register_vector_store(obj)
elif api == Api.datasetio:
return await p.register_dataset(obj)
elif api == Api.scoring:
@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
api = get_impl_api(p)
if api == Api.vector_io:
return await p.unregister_vector_db(obj.identifier)
return await p.unregister_vector_store(obj.identifier)
elif api == Api.inference:
return await p.unregister_model(obj.identifier)
elif api == Api.safety:
@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable):
elif api == Api.safety:
p.shield_store = self
elif api == Api.vector_io:
p.vector_db_store = self
p.vector_store_store = self
elif api == Api.datasetio:
p.dataset_store = self
elif api == Api.scoring:
@ -134,15 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
from .scoring_functions import ScoringFunctionsRoutingTable
from .shields import ShieldsRoutingTable
from .toolgroups import ToolGroupsRoutingTable
from .vector_dbs import VectorDBsRoutingTable
from .vector_stores import VectorStoresRoutingTable
def apiname_object():
if isinstance(self, ModelsRoutingTable):
return ("Inference", "model")
elif isinstance(self, ShieldsRoutingTable):
return ("Safety", "shield")
elif isinstance(self, VectorDBsRoutingTable):
return ("VectorIO", "vector_db")
elif isinstance(self, VectorStoresRoutingTable):
return ("VectorIO", "vector_store")
elif isinstance(self, DatasetsRoutingTable):
return ("DatasetIO", "dataset")
elif isinstance(self, ScoringFunctionsRoutingTable):

View file

@ -6,15 +6,12 @@
from typing import Any
from pydantic import TypeAdapter
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.models import ModelType
from llama_stack.apis.resource import ResourceType
# Removed VectorDBs import to avoid exposing public API
# Removed VectorStores import to avoid exposing public API
from llama_stack.apis.vector_io.vector_io import (
OpenAICreateVectorStoreRequestWithExtraBody,
SearchRankingOptions,
VectorStoreChunkingStrategy,
VectorStoreDeleteResponse,
@ -26,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
VectorStoreSearchResponsePage,
)
from llama_stack.core.datatypes import (
VectorDBWithOwner,
VectorStoreWithOwner,
)
from llama_stack.log import get_logger
@ -35,23 +32,23 @@ from .common import CommonRoutingTableImpl, lookup_model
logger = get_logger(name=__name__, category="core::routing_tables")
class VectorDBsRoutingTable(CommonRoutingTableImpl):
"""Internal routing table for vector_db operations.
class VectorStoresRoutingTable(CommonRoutingTableImpl):
"""Internal routing table for vector_store operations.
Does not inherit from VectorDBs to avoid exposing public API endpoints.
Does not inherit from VectorStores to avoid exposing public API endpoints.
Only provides internal routing functionality for VectorIORouter.
"""
# Internal methods only - no public API exposure
async def register_vector_db(
async def register_vector_store(
self,
vector_db_id: str,
vector_store_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
provider_vector_db_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_store_id: str | None = None,
vector_store_name: str | None = None,
) -> Any:
if provider_id is None:
if len(self.impls_by_provider_id) > 0:
@ -67,52 +64,24 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
raise ModelNotFoundError(embedding_model)
if model.model_type != ModelType.embedding:
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
if "embedding_dimension" not in model.metadata:
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
try:
provider = self.impls_by_provider_id[provider_id]
except KeyError:
available_providers = list(self.impls_by_provider_id.keys())
raise ValueError(
f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
) from None
logger.warning(
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
)
request = OpenAICreateVectorStoreRequestWithExtraBody(
name=vector_db_name or vector_db_id,
embedding_model=embedding_model,
embedding_dimension=model.metadata["embedding_dimension"],
vector_store = VectorStoreWithOwner(
identifier=vector_store_id,
type=ResourceType.vector_store.value,
provider_id=provider_id,
provider_vector_db_id=provider_vector_db_id,
provider_resource_id=provider_vector_store_id,
embedding_model=embedding_model,
embedding_dimension=embedding_dimension,
vector_store_name=vector_store_name,
)
vector_store = await provider.openai_create_vector_store(request)
vector_store_id = vector_store.id
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
logger.warning(
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
)
vector_db_data = {
"identifier": vector_store_id,
"type": ResourceType.vector_db.value,
"provider_id": provider_id,
"provider_resource_id": actual_provider_vector_db_id,
"embedding_model": embedding_model,
"embedding_dimension": model.metadata["embedding_dimension"],
"vector_db_name": vector_store.name,
}
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
await self.register_object(vector_db)
return vector_db
await self.register_object(vector_store)
return vector_store
async def openai_retrieve_vector_store(
self,
vector_store_id: str,
) -> VectorStoreObject:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store(vector_store_id)
@ -123,7 +92,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
expires_after: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> VectorStoreObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id)
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store(
vector_store_id=vector_store_id,
@ -136,18 +105,18 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
self,
vector_store_id: str,
) -> VectorStoreDeleteResponse:
await self.assert_action_allowed("delete", "vector_db", vector_store_id)
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
result = await provider.openai_delete_vector_store(vector_store_id)
await self.unregister_vector_db(vector_store_id)
await self.unregister_vector_store(vector_store_id)
return result
async def unregister_vector_db(self, vector_store_id: str) -> None:
async def unregister_vector_store(self, vector_store_id: str) -> None:
"""Remove the vector store from the routing table registry."""
try:
vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id)
if vector_db_obj:
await self.unregister_object(vector_db_obj)
vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
if vector_store_obj:
await self.unregister_object(vector_store_obj)
except Exception as e:
# Log the error but don't fail the operation
logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
@ -162,7 +131,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
rewrite_query: bool | None = False,
search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_search_vector_store(
vector_store_id=vector_store_id,
@ -181,7 +150,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id)
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id,
@ -199,7 +168,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> list[VectorStoreFileObject]:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store(
vector_store_id=vector_store_id,
@ -215,7 +184,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
vector_store_id: str,
file_id: str,
) -> VectorStoreFileObject:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file(
vector_store_id=vector_store_id,
@ -227,7 +196,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
await self.assert_action_allowed("read", "vector_db", vector_store_id)
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(
vector_store_id=vector_store_id,
@ -240,7 +209,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
file_id: str,
attributes: dict[str, Any],
) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id)
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store_file(
vector_store_id=vector_store_id,
@ -253,7 +222,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
vector_store_id: str,
file_id: str,
) -> VectorStoreFileDeleteResponse:
await self.assert_action_allowed("delete", "vector_db", vector_store_id)
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store_file(
vector_store_id=vector_store_id,
@ -267,7 +236,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
attributes: dict[str, Any] | None = None,
chunking_strategy: Any | None = None,
):
await self.assert_action_allowed("update", "vector_db", vector_store_id)
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_create_vector_store_file_batch(
vector_store_id=vector_store_id,
@ -281,7 +250,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
batch_id: str,
vector_store_id: str,
):
await self.assert_action_allowed("read", "vector_db", vector_store_id)
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_batch(
batch_id=batch_id,
@ -298,7 +267,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
limit: int | None = 20,
order: str | None = "desc",
):
await self.assert_action_allowed("read", "vector_db", vector_store_id)
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store_file_batch(
batch_id=batch_id,
@ -315,7 +284,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
batch_id: str,
vector_store_id: str,
):
await self.assert_action_allowed("update", "vector_db", vector_store_id)
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_cancel_vector_store_file_batch(
batch_id=batch_id,

View file

@ -32,7 +32,7 @@ def tool_chat_page():
tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
selected_vector_dbs = []
selected_vector_stores = []
def reset_agent():
st.session_state.clear()
@ -55,13 +55,13 @@ def tool_chat_page():
)
if "builtin::rag" in toolgroup_selection:
vector_dbs = llama_stack_api.client.vector_dbs.list() or []
if not vector_dbs:
vector_stores = llama_stack_api.client.vector_stores.list() or []
if not vector_stores:
st.info("No vector databases available for selection.")
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
selected_vector_dbs = st.multiselect(
vector_stores = [vector_store.identifier for vector_store in vector_stores]
selected_vector_stores = st.multiselect(
label="Select Document Collections to use in RAG queries",
options=vector_dbs,
options=vector_stores,
on_change=reset_agent,
)
@ -119,7 +119,7 @@ def tool_chat_page():
tool_dict = dict(
name="builtin::rag",
args={
"vector_db_ids": list(selected_vector_dbs),
"vector_store_ids": list(selected_vector_stores),
},
)
toolgroup_selection[i] = tool_dict

View file

@ -17,7 +17,7 @@ from llama_stack.apis.models import Model
from llama_stack.apis.scoring_functions import ScoringFn
from llama_stack.apis.shields import Shield
from llama_stack.apis.tools import ToolGroup
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.schema_utils import json_schema_type
@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol):
async def unregister_shield(self, identifier: str) -> None: ...
class VectorDBsProtocolPrivate(Protocol):
async def register_vector_db(self, vector_db: VectorDB) -> None: ...
class VectorStoresProtocolPrivate(Protocol):
async def register_vector_store(self, vector_store: VectorStore) -> None: ...
async def unregister_vector_db(self, vector_db_id: str) -> None: ...
async def unregister_vector_store(self, vector_store_id: str) -> None: ...
class DatasetsProtocolPrivate(Protocol):

View file

@ -17,21 +17,21 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import FaissVectorIOConfig
logger = get_logger(name=__name__, category="vector_io")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
@ -176,28 +176,28 @@ class FaissIndex(EmbeddingIndex):
)
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.cache: dict[str, VectorDBWithIndex] = {}
self.cache: dict[str, VectorStoreWithIndex] = {}
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence)
# Load existing banks from kvstore
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(vector_db_data)
index = VectorDBWithIndex(
vector_db,
await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
for vector_store_data in stored_vector_stores:
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex(
vector_store,
await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
self.inference_api,
)
self.cache[vector_db.identifier] = index
self.cache[vector_store.identifier] = index
# Load existing OpenAI vector stores into the in-memory cache
await self.initialize_openai_vector_stores()
@ -222,32 +222,31 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
except Exception as e:
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
assert self.kvstore is not None
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
await self.kvstore.set(key=key, value=vector_db.model_dump_json())
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
# Store in cache
self.cache[vector_db.identifier] = VectorDBWithIndex(
vector_db=vector_db,
index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_store=vector_store,
index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
inference_api=self.inference_api,
)
async def list_vector_dbs(self) -> list[VectorDB]:
return [i.vector_db for i in self.cache.values()]
async def list_vector_stores(self) -> list[VectorStore]:
return [i.vector_store for i in self.cache.values()]
async def unregister_vector_db(self, vector_db_id: str) -> None:
async def unregister_vector_store(self, vector_store_id: str) -> None:
assert self.kvstore is not None
if vector_db_id not in self.cache:
logger.warning(f"Vector DB {vector_db_id} not found")
if vector_store_id not in self.cache:
return
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = self.cache.get(vector_db_id)

View file

@ -17,10 +17,10 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_RRF,
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
VectorStoreWithIndex,
)
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
@ -41,7 +41,7 @@ HYBRID_SEARCH = "hybrid"
SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH}
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::"
@ -374,32 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex):
await asyncio.to_thread(_delete_chunks)
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
"""
A VectorIO implementation using SQLite + sqlite_vec.
This class handles vector database registration (with metadata stored in a table named `vector_dbs`)
and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
This class handles vector database registration (with metadata stored in a table named `vector_stores`)
and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
"""
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.cache: dict[str, VectorDBWithIndex] = {}
self.vector_db_store = None
self.cache: dict[str, VectorStoreWithIndex] = {}
self.vector_store_table = None
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence)
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
for db_json in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(db_json)
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for db_json in stored_vector_stores:
vector_store = VectorStore.model_validate_json(db_json)
index = await SQLiteVecIndex.create(
vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
)
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
# Load existing OpenAI vector stores into the in-memory cache
await self.initialize_openai_vector_stores()
@ -408,63 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def list_vector_dbs(self) -> list[VectorDB]:
return [v.vector_db for v in self.cache.values()]
async def list_vector_stores(self) -> list[VectorStore]:
return [v.vector_store for v in self.cache.values()]
async def register_vector_db(self, vector_db: VectorDB) -> None:
index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
async def register_vector_store(self, vector_store: VectorStore) -> None:
index = await SQLiteVecIndex.create(
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
)
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_db_store is None:
raise VectorStoreNotFoundError(vector_db_id)
if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_store_id)
vector_db = self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
vector_store = self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
index = VectorDBWithIndex(
vector_db=vector_db,
index = VectorStoreWithIndex(
vector_store=vector_store,
index=SQLiteVecIndex(
dimension=vector_db.embedding_dimension,
dimension=vector_store.embedding_dimension,
db_path=self.config.db_path,
bank_id=vector_db.identifier,
bank_id=vector_store.identifier,
kvstore=self.kvstore,
),
inference_api=self.inference_api,
)
self.cache[vector_db_id] = index
self.cache[vector_store_id] = index
return index
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id not in self.cache:
logger.warning(f"Vector DB {vector_db_id} not found")
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id not in self.cache:
return
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
# The VectorDBWithIndex helper is expected to compute embeddings via the inference_api
# The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
# and then call our index's add_chunks.
await index.insert_chunks(chunks)
async def query_chunks(
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a sqlite_vec index."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise VectorStoreNotFoundError(store_id)

View file

@ -13,15 +13,15 @@ from numpy.typing import NDArray
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
@ -30,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma")
ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::"
@ -114,7 +114,7 @@ class ChromaIndex(EmbeddingIndex):
raise NotImplementedError("Hybrid search is not supported in Chroma")
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(
self,
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
@ -127,11 +127,11 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.inference_api = inference_api
self.client = None
self.cache = {}
self.vector_db_store = None
self.vector_store_table = None
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence)
self.vector_db_store = self.kvstore
self.vector_store_table = self.kvstore
if isinstance(self.config, RemoteChromaVectorIOConfig):
log.info(f"Connecting to Chroma server at: {self.config.url}")
@ -151,26 +151,26 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
collection = await maybe_await(
self.client.get_or_create_collection(
name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()}
name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()}
)
)
self.cache[vector_db.identifier] = VectorDBWithIndex(
vector_db, ChromaIndex(self.client, collection), self.inference_api
self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_store, ChromaIndex(self.client, collection), self.inference_api
)
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id not in self.cache:
log.warning(f"Vector DB {vector_db_id} not found")
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id not in self.cache:
log.warning(f"Vector DB {vector_store_id} not found")
return
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
@ -179,30 +179,30 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
return await index.query_chunks(query, params)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
collection = await maybe_await(self.client.get_collection(vector_db_id))
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
collection = await maybe_await(self.client.get_collection(vector_store_id))
if not collection:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
self.cache[vector_db_id] = index
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
self.cache[vector_store_id] = index
return index
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a Chroma vector store."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise ValueError(f"Vector DB {store_id} not found")

View file

@ -14,10 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
@ -26,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_WEIGHTED,
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
VectorStoreWithIndex,
)
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
@ -35,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
logger = get_logger(name=__name__, category="vector_io::milvus")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::"
@ -261,7 +261,7 @@ class MilvusIndex(EmbeddingIndex):
raise
class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(
self,
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
@ -273,28 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.cache = {}
self.client = None
self.inference_api = inference_api
self.vector_db_store = None
self.vector_store_table = None
self.metadata_collection_name = "openai_vector_stores_metadata"
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence)
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(vector_db_data)
index = VectorDBWithIndex(
vector_db,
for vector_store_data in stored_vector_stores:
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex(
vector_store,
index=MilvusIndex(
client=self.client,
collection_name=vector_db.identifier,
collection_name=vector_store.identifier,
consistency_level=self.config.consistency_level,
kvstore=self.kvstore,
),
inference_api=self.inference_api,
)
self.cache[vector_db.identifier] = index
self.cache[vector_store.identifier] = index
if isinstance(self.config, RemoteMilvusVectorIOConfig):
logger.info(f"Connecting to Milvus server at {self.config.uri}")
self.client = MilvusClient(**self.config.model_dump(exclude_none=True))
@ -311,45 +311,45 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
if isinstance(self.config, RemoteMilvusVectorIOConfig):
consistency_level = self.config.consistency_level
else:
consistency_level = "Strong"
index = VectorDBWithIndex(
vector_db=vector_db,
index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level),
index = VectorStoreWithIndex(
vector_store=vector_store,
index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
inference_api=self.inference_api,
)
self.cache[vector_db.identifier] = index
self.cache[vector_store.identifier] = index
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_db_store is None:
raise VectorStoreNotFoundError(vector_db_id)
if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_store_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
index = VectorDBWithIndex(
vector_db=vector_db,
index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore),
index = VectorStoreWithIndex(
vector_store=vector_store,
index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
inference_api=self.inference_api,
)
self.cache[vector_db_id] = index
self.cache[vector_store_id] = index
return index
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id in self.cache:
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id in self.cache:
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
@ -358,14 +358,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a milvus vector store."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise VectorStoreNotFoundError(store_id)

View file

@ -16,15 +16,15 @@ from pydantic import BaseModel, TypeAdapter
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
from .config import PGVectorVectorIOConfig
@ -32,7 +32,7 @@ from .config import PGVectorVectorIOConfig
log = get_logger(name=__name__, category="vector_io::pgvector")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::"
@ -79,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex):
def __init__(
self,
vector_db: VectorDB,
vector_store: VectorStore,
dimension: int,
conn: psycopg2.extensions.connection,
kvstore: KVStore | None = None,
distance_metric: str = "COSINE",
):
self.vector_db = vector_db
self.vector_store = vector_store
self.dimension = dimension
self.conn = conn
self.kvstore = kvstore
@ -97,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex):
try:
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
# Sanitize the table name by replacing hyphens with underscores
# SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens
# SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens
# when created with patterns like "test-vector-db-{uuid4()}"
sanitized_identifier = sanitize_collection_name(self.vector_db.identifier)
sanitized_identifier = sanitize_collection_name(self.vector_store.identifier)
self.table_name = f"vs_{sanitized_identifier}"
cur.execute(
@ -122,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex):
"""
)
except Exception as e:
log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}")
raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e
log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
assert len(chunks) == len(embeddings), (
@ -323,7 +323,7 @@ class PGVectorIndex(EmbeddingIndex):
)
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(
self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
) -> None:
@ -332,7 +332,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
self.inference_api = inference_api
self.conn = None
self.cache = {}
self.vector_db_store = None
self.vector_store_table = None
self.metadata_collection_name = "openai_vector_stores_metadata"
async def initialize(self) -> None:
@ -375,59 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
# Persist vector DB metadata in the KV store
assert self.kvstore is not None
# Upsert model metadata in Postgres
upsert_models(self.conn, [(vector_db.identifier, vector_db)])
upsert_models(self.conn, [(vector_store.identifier, vector_store)])
# Create and cache the PGVector index table for the vector DB
pgvector_index = PGVectorIndex(
vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore
vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore
)
await pgvector_index.initialize()
index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api)
self.cache[vector_db.identifier] = index
index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
self.cache[vector_store.identifier] = index
async def unregister_vector_db(self, vector_db_id: str) -> None:
async def unregister_vector_store(self, vector_store_id: str) -> None:
# Remove provider index and cache
if vector_db_id in self.cache:
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
if vector_store_id in self.cache:
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
# Delete vector DB metadata from KV store
assert self.kvstore is not None
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}")
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
await index.insert_chunks(chunks)
async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
return await index.query_chunks(query, params)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_db_store is None:
raise VectorStoreNotFoundError(vector_db_id)
if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_store_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn)
index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
await index.initialize()
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api)
return self.cache[vector_db_id]
self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
return self.cache[vector_store_id]
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a PostgreSQL vector store."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise VectorStoreNotFoundError(store_id)

View file

@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
@ -24,12 +23,13 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy,
VectorStoreFileObject,
)
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
@ -38,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id"
# KV store prefixes for vector databases
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::"
def convert_id(_id: str) -> str:
@ -145,7 +145,7 @@ class QdrantIndex(EmbeddingIndex):
await self.client.delete_collection(collection_name=self.collection_name)
class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(
self,
config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
@ -157,7 +157,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.client: AsyncQdrantClient = None
self.cache = {}
self.inference_api = inference_api
self.vector_db_store = None
self.vector_store_table = None
self._qdrant_lock = asyncio.Lock()
async def initialize(self) -> None:
@ -167,12 +167,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(vector_db_data)
index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api)
self.cache[vector_db.identifier] = index
for vector_store_data in stored_vector_stores:
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex(
vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api
)
self.cache[vector_store.identifier] = index
self.openai_vector_stores = await self._load_openai_vector_stores()
async def shutdown(self) -> None:
@ -180,46 +182,48 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
assert self.kvstore is not None
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
await self.kvstore.set(key=key, value=vector_db.model_dump_json())
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
index = VectorDBWithIndex(
vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api
)
self.cache[vector_db.identifier] = index
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id in self.cache:
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
assert self.kvstore is not None
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
if self.vector_db_store is None:
raise ValueError(f"Vector DB not found {vector_db_id}")
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
index = VectorDBWithIndex(
vector_db=vector_db,
index=QdrantIndex(client=self.client, collection_name=vector_db.identifier),
index = VectorStoreWithIndex(
vector_store=vector_store,
index=QdrantIndex(self.client, vector_store.identifier),
inference_api=self.inference_api,
)
self.cache[vector_db_id] = index
self.cache[vector_store.identifier] = index
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id in self.cache:
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
assert self.kvstore is not None
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_store_table is None:
raise ValueError(f"Vector DB not found {vector_store_id}")
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
index = VectorStoreWithIndex(
vector_store=vector_store,
index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
inference_api=self.inference_api,
)
self.cache[vector_store_id] = index
return index
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
@ -228,7 +232,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
@ -249,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a Qdrant vector store."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise ValueError(f"Vector DB {store_id} not found")

View file

@ -16,11 +16,11 @@ from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_RRF,
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
VectorStoreWithIndex,
)
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
@ -37,7 +37,7 @@ from .config import WeaviateVectorIOConfig
log = get_logger(name=__name__, category="vector_io::weaviate")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::"
@ -257,14 +257,14 @@ class WeaviateIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=chunks, scores=scores)
class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate):
class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.client_cache = {}
self.cache = {}
self.vector_db_store = None
self.vector_store_table = None
self.metadata_collection_name = "openai_vector_stores_metadata"
def _get_client(self) -> weaviate.WeaviateClient:
@ -300,11 +300,11 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored = await self.kvstore.values_in_range(start_key, end_key)
for raw in stored:
vector_db = VectorDB.model_validate_json(raw)
vector_store = VectorStore.model_validate_json(raw)
client = self._get_client()
idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore)
self.cache[vector_db.identifier] = VectorDBWithIndex(
vector_db=vector_db, index=idx, inference_api=self.inference_api
idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore)
self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_store=vector_store, index=idx, inference_api=self.inference_api
)
# Load OpenAI vector stores metadata into cache
@ -316,9 +316,9 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
client = self._get_client()
sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True)
sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
# Create collection if it doesn't exist
if not client.collections.exists(sanitized_collection_name):
client.collections.create(
@ -329,45 +329,45 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
],
)
self.cache[vector_db.identifier] = VectorDBWithIndex(
vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
)
async def unregister_vector_db(self, vector_db_id: str) -> None:
async def unregister_vector_store(self, vector_store_id: str) -> None:
client = self._get_client()
sanitized_collection_name = sanitize_collection_name(vector_db_id, weaviate_format=True)
if vector_db_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True)
if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
return
client.collections.delete(sanitized_collection_name)
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_db_store is None:
raise VectorStoreNotFoundError(vector_db_id)
if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_store_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
client = self._get_client()
sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True)
sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
if not client.collections.exists(sanitized_collection_name):
raise ValueError(f"Collection with name `{sanitized_collection_name}` not found")
index = VectorDBWithIndex(
vector_db=vector_db,
index=WeaviateIndex(client=client, collection_name=vector_db.identifier),
index = VectorStoreWithIndex(
vector_store=vector_store,
index=WeaviateIndex(client=client, collection_name=vector_store.identifier),
inference_api=self.inference_api,
)
self.cache[vector_db_id] = index
self.cache[vector_store_id] = index
return index
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
@ -376,14 +376,14 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise ValueError(f"Vector DB {store_id} not found")

View file

@ -17,7 +17,6 @@ from pydantic import TypeAdapter
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files, OpenAIFileObject
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@ -43,6 +42,7 @@ from llama_stack.apis.vector_io import (
VectorStoreSearchResponse,
VectorStoreSearchResponsePage,
)
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.id_generation import generate_object_id
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore
@ -63,7 +63,7 @@ MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within
FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
@ -321,12 +321,12 @@ class OpenAIVectorStoreMixin(ABC):
pass
@abstractmethod
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
"""Register a vector database (provider-specific implementation)."""
pass
@abstractmethod
async def unregister_vector_db(self, vector_db_id: str) -> None:
async def unregister_vector_store(self, vector_store_id: str) -> None:
"""Unregister a vector database (provider-specific implementation)."""
pass
@ -358,7 +358,7 @@ class OpenAIVectorStoreMixin(ABC):
extra_body = params.model_extra or {}
metadata = params.metadata or {}
provider_vector_db_id = extra_body.get("provider_vector_db_id")
provider_vector_store_id = extra_body.get("provider_vector_store_id")
# Use embedding info from metadata if available, otherwise from extra_body
if metadata.get("embedding_model"):
@ -389,8 +389,8 @@ class OpenAIVectorStoreMixin(ABC):
# use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
# Derive the canonical vector_db_id (allow override, else generate)
vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
# Derive the canonical vector_store_id (allow override, else generate)
vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
if embedding_model is None:
raise ValueError("embedding_model is required")
@ -398,19 +398,20 @@ class OpenAIVectorStoreMixin(ABC):
if embedding_dimension is None:
raise ValueError("Embedding dimension is required")
# Register the VectorDB backing this vector store
# Register the VectorStore backing this vector store
if provider_id is None:
raise ValueError("Provider ID is required but was not provided")
vector_db = VectorDB(
identifier=vector_db_id,
# call to the provider to create any index, etc.
vector_store = VectorStore(
identifier=vector_store_id,
embedding_dimension=embedding_dimension,
embedding_model=embedding_model,
provider_id=provider_id,
provider_resource_id=vector_db_id,
vector_db_name=params.name,
provider_resource_id=vector_store_id,
vector_store_name=params.name,
)
await self.register_vector_db(vector_db)
await self.register_vector_store(vector_store)
# Create OpenAI vector store metadata
status = "completed"
@ -424,7 +425,7 @@ class OpenAIVectorStoreMixin(ABC):
total=0,
)
store_info: dict[str, Any] = {
"id": vector_db_id,
"id": vector_store_id,
"object": "vector_store",
"created_at": created_at,
"name": params.name,
@ -441,23 +442,23 @@ class OpenAIVectorStoreMixin(ABC):
# Add provider information to metadata if provided
if provider_id:
metadata["provider_id"] = provider_id
if provider_vector_db_id:
metadata["provider_vector_db_id"] = provider_vector_db_id
if provider_vector_store_id:
metadata["provider_vector_store_id"] = provider_vector_store_id
store_info["metadata"] = metadata
# Save to persistent storage (provider-specific)
await self._save_openai_vector_store(vector_db_id, store_info)
await self._save_openai_vector_store(vector_store_id, store_info)
# Store in memory cache
self.openai_vector_stores[vector_db_id] = store_info
self.openai_vector_stores[vector_store_id] = store_info
# Now that our vector store is created, attach any files that were provided
file_ids = params.file_ids or []
tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids]
tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids]
await asyncio.gather(*tasks)
# Get the updated store info and return it
store_info = self.openai_vector_stores[vector_db_id]
store_info = self.openai_vector_stores[vector_store_id]
return VectorStoreObject.model_validate(store_info)
async def openai_list_vector_stores(
@ -567,7 +568,7 @@ class OpenAIVectorStoreMixin(ABC):
# Also delete the underlying vector DB
try:
await self.unregister_vector_db(vector_store_id)
await self.unregister_vector_store(vector_store_id)
except Exception as e:
logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}")

View file

@ -23,8 +23,8 @@ from llama_stack.apis.common.content_types import (
)
from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.datatypes import Api
@ -187,7 +187,7 @@ def make_overlapped_chunks(
updated_timestamp=int(time.time()),
chunk_window=chunk_window,
chunk_tokenizer=default_tokenizer,
chunk_embedding_model=None, # This will be set in `VectorDBWithIndex.insert_chunks`
chunk_embedding_model=None, # This will be set in `VectorStoreWithIndex.insert_chunks`
content_token_count=len(toks),
metadata_token_count=len(metadata_tokens),
)
@ -255,8 +255,8 @@ class EmbeddingIndex(ABC):
@dataclass
class VectorDBWithIndex:
vector_db: VectorDB
class VectorStoreWithIndex:
vector_store: VectorStore
index: EmbeddingIndex
inference_api: Api.inference
@ -269,14 +269,14 @@ class VectorDBWithIndex:
if c.embedding is None:
chunks_to_embed.append(c)
if c.chunk_metadata:
c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model
c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension
c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model
c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension
else:
_validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
_validate_embedding(c.embedding, i, self.vector_store.embedding_dimension)
if chunks_to_embed:
params = OpenAIEmbeddingsRequestWithExtraBody(
model=self.vector_db.embedding_model,
model=self.vector_store.embedding_model,
input=[c.content for c in chunks_to_embed],
)
resp = await self.inference_api.openai_embeddings(params)
@ -319,7 +319,7 @@ class VectorDBWithIndex:
return await self.index.query_keyword(query_string, k, score_threshold)
params = OpenAIEmbeddingsRequestWithExtraBody(
model=self.vector_db.embedding_model,
model=self.vector_store.embedding_model,
input=[query_string],
)
embeddings_response = await self.inference_api.openai_embeddings(params)

View file

@ -37,6 +37,9 @@ def pytest_sessionstart(session):
if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
if "LLAMA_STACK_LOGGING" not in os.environ:
os.environ["LLAMA_STACK_LOGGING"] = "all=warning"
if "SQLITE_STORE_DIR" not in os.environ:
os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp()

View file

@ -49,46 +49,50 @@ def client_with_empty_registry(client_with_models):
@vector_provider_wrapper
def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
vector_db_name = "test_vector_db"
def test_vector_store_retrieve(
client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
):
vector_store_name = "test_vector_store"
create_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name,
name=vector_store_name,
extra_body={
"provider_id": vector_io_provider_id,
},
)
actual_vector_db_id = create_response.id
actual_vector_store_id = create_response.id
# Retrieve the vector store and validate its properties
response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_db_id)
response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_store_id)
assert response is not None
assert response.id == actual_vector_db_id
assert response.name == vector_db_name
assert response.id == actual_vector_store_id
assert response.name == vector_store_name
assert response.id.startswith("vs_")
@vector_provider_wrapper
def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id):
vector_db_name = "test_vector_db"
def test_vector_store_register(
client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
):
vector_store_name = "test_vector_store"
response = client_with_empty_registry.vector_stores.create(
name=vector_db_name,
name=vector_store_name,
extra_body={
"provider_id": vector_io_provider_id,
},
)
actual_vector_db_id = response.id
assert actual_vector_db_id.startswith("vs_")
assert actual_vector_db_id != vector_db_name
actual_vector_store_id = response.id
assert actual_vector_store_id.startswith("vs_")
assert actual_vector_store_id != vector_store_name
vector_stores = client_with_empty_registry.vector_stores.list()
assert len(vector_stores.data) == 1
vector_store = vector_stores.data[0]
assert vector_store.id == actual_vector_db_id
assert vector_store.name == vector_db_name
assert vector_store.id == actual_vector_store_id
assert vector_store.name == vector_store_name
client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_db_id)
client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_store_id)
vector_stores = client_with_empty_registry.vector_stores.list()
assert len(vector_stores.data) == 0
@ -108,23 +112,23 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
def test_insert_chunks(
client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id
):
vector_db_name = "test_vector_db"
vector_store_name = "test_vector_store"
create_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name,
name=vector_store_name,
extra_body={
"provider_id": vector_io_provider_id,
},
)
actual_vector_db_id = create_response.id
actual_vector_store_id = create_response.id
client_with_empty_registry.vector_io.insert(
vector_db_id=actual_vector_db_id,
vector_db_id=actual_vector_store_id,
chunks=sample_chunks,
)
response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id,
vector_db_id=actual_vector_store_id,
query="What is the capital of France?",
)
assert response is not None
@ -133,7 +137,7 @@ def test_insert_chunks(
query, expected_doc_id = test_case
response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id,
vector_db_id=actual_vector_store_id,
query=query,
)
assert response is not None
@ -151,15 +155,15 @@ def test_insert_chunks_with_precomputed_embeddings(
"inline::qdrant": {"score_threshold": -1.0},
"remote::qdrant": {"score_threshold": -1.0},
}
vector_db_name = "test_precomputed_embeddings_db"
vector_store_name = "test_precomputed_embeddings_db"
register_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name,
name=vector_store_name,
extra_body={
"provider_id": vector_io_provider_id,
},
)
actual_vector_db_id = register_response.id
actual_vector_store_id = register_response.id
chunks_with_embeddings = [
Chunk(
@ -170,13 +174,13 @@ def test_insert_chunks_with_precomputed_embeddings(
]
client_with_empty_registry.vector_io.insert(
vector_db_id=actual_vector_db_id,
vector_db_id=actual_vector_store_id,
chunks=chunks_with_embeddings,
)
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id,
vector_db_id=actual_vector_store_id,
query="precomputed embedding test",
params=vector_io_provider_params_dict.get(provider, None),
)
@ -200,16 +204,16 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
"remote::qdrant": {"score_threshold": 0.0},
"inline::qdrant": {"score_threshold": 0.0},
}
vector_db_name = "test_precomputed_embeddings_db"
vector_store_name = "test_precomputed_embeddings_db"
register_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name,
name=vector_store_name,
extra_body={
"embedding_model": embedding_model_id,
"provider_id": vector_io_provider_id,
},
)
actual_vector_db_id = register_response.id
actual_vector_store_id = register_response.id
chunks_with_embeddings = [
Chunk(
@ -220,13 +224,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
]
client_with_empty_registry.vector_io.insert(
vector_db_id=actual_vector_db_id,
vector_db_id=actual_vector_store_id,
chunks=chunks_with_embeddings,
)
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id,
vector_db_id=actual_vector_store_id,
query="duplicate",
params=vector_io_provider_params_dict.get(provider, None),
)

View file

@ -21,7 +21,7 @@ async def test_single_provider_auto_selection():
Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
]
)
mock_routing_table.register_vector_db = AsyncMock(
mock_routing_table.register_vector_store = AsyncMock(
return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123")
)
mock_routing_table.get_provider_impl = AsyncMock(

View file

@ -10,8 +10,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
import numpy as np
import pytest
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
@ -31,7 +31,7 @@ def vector_provider(request):
@pytest.fixture
def vector_db_id() -> str:
def vector_store_id() -> str:
return f"test-vector-db-{random.randint(1, 100)}"
@ -149,8 +149,8 @@ async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inf
)
collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}"
await adapter.initialize()
await adapter.register_vector_db(
VectorDB(
await adapter.register_vector_store(
VectorStore(
identifier=collection_id,
provider_id="test_provider",
embedding_model="test_model",
@ -186,8 +186,8 @@ async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding
files_api=None,
)
await adapter.initialize()
await adapter.register_vector_db(
VectorDB(
await adapter.register_vector_store(
VectorStore(
identifier=f"faiss_test_collection_{np.random.randint(1e6)}",
provider_id="test_provider",
embedding_model="test_model",
@ -215,7 +215,7 @@ def mock_psycopg2_connection():
async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection):
connection, cursor = mock_psycopg2_connection
vector_db = VectorDB(
vector_store = VectorStore(
identifier="test-vector-db",
embedding_model="test-model",
embedding_dimension=embedding_dimension,
@ -225,7 +225,7 @@ async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection):
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"):
index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE")
index = PGVectorIndex(vector_store, embedding_dimension, connection, distance_metric="COSINE")
index._test_chunks = []
original_add_chunks = index.add_chunks
@ -281,30 +281,30 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd
await adapter.initialize()
adapter.conn = mock_conn
async def mock_insert_chunks(vector_db_id, chunks, ttl_seconds=None):
index = await adapter._get_and_cache_vector_db_index(vector_db_id)
async def mock_insert_chunks(vector_store_id, chunks, ttl_seconds=None):
index = await adapter._get_and_cache_vector_store_index(vector_store_id)
if not index:
raise ValueError(f"Vector DB {vector_db_id} not found")
raise ValueError(f"Vector DB {vector_store_id} not found")
await index.insert_chunks(chunks)
adapter.insert_chunks = mock_insert_chunks
async def mock_query_chunks(vector_db_id, query, params=None):
index = await adapter._get_and_cache_vector_db_index(vector_db_id)
async def mock_query_chunks(vector_store_id, query, params=None):
index = await adapter._get_and_cache_vector_store_index(vector_store_id)
if not index:
raise ValueError(f"Vector DB {vector_db_id} not found")
raise ValueError(f"Vector DB {vector_store_id} not found")
return await index.query_chunks(query, params)
adapter.query_chunks = mock_query_chunks
test_vector_db = VectorDB(
test_vector_store = VectorStore(
identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}",
provider_id="test_provider",
embedding_model="test_model",
embedding_dimension=embedding_dimension,
)
await adapter.register_vector_db(test_vector_db)
adapter.test_collection_id = test_vector_db.identifier
await adapter.register_vector_store(test_vector_store)
adapter.test_collection_id = test_vector_store.identifier
yield adapter
await adapter.shutdown()

View file

@ -11,8 +11,8 @@ import numpy as np
import pytest
from llama_stack.apis.files import Files
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.providers.datatypes import HealthStatus
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import (
@ -43,8 +43,8 @@ def embedding_dimension():
@pytest.fixture
def vector_db_id():
return "test_vector_db"
def vector_store_id():
return "test_vector_store"
@pytest.fixture
@ -61,12 +61,12 @@ def sample_embeddings(embedding_dimension):
@pytest.fixture
def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock:
mock_vector_db = MagicMock(spec=VectorDB)
mock_vector_db.embedding_model = "mock_embedding_model"
mock_vector_db.identifier = vector_db_id
mock_vector_db.embedding_dimension = embedding_dimension
return mock_vector_db
def mock_vector_store(vector_store_id, embedding_dimension) -> MagicMock:
mock_vector_store = MagicMock(spec=VectorStore)
mock_vector_store.embedding_model = "mock_embedding_model"
mock_vector_store.identifier = vector_store_id
mock_vector_store.embedding_dimension = embedding_dimension
return mock_vector_store
@pytest.fixture

View file

@ -12,7 +12,6 @@ import numpy as np
import pytest
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategyAuto,
VectorStoreFileObject,
)
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
# This test is a unit test for the inline VectorIO providers. This should only contain
@ -71,7 +71,7 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio
async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
key = f"{VECTOR_DBS_PREFIX}db1"
dummy = VectorDB(
dummy = VectorStore(
identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
)
await vector_io_adapter.kvstore.set(key=key, value=json.dumps(dummy.model_dump()))
@ -81,10 +81,10 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
async def test_persistence_across_adapter_restarts(vector_io_adapter):
await vector_io_adapter.initialize()
dummy = VectorDB(
dummy = VectorStore(
identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
)
await vector_io_adapter.register_vector_db(dummy)
await vector_io_adapter.register_vector_store(dummy)
await vector_io_adapter.shutdown()
await vector_io_adapter.initialize()
@ -92,15 +92,15 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
await vector_io_adapter.shutdown()
async def test_register_and_unregister_vector_db(vector_io_adapter):
async def test_register_and_unregister_vector_store(vector_io_adapter):
unique_id = f"foo_db_{np.random.randint(1e6)}"
dummy = VectorDB(
dummy = VectorStore(
identifier=unique_id, provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
)
await vector_io_adapter.register_vector_db(dummy)
await vector_io_adapter.register_vector_store(dummy)
assert dummy.identifier in vector_io_adapter.cache
await vector_io_adapter.unregister_vector_db(dummy.identifier)
await vector_io_adapter.unregister_vector_store(dummy.identifier)
assert dummy.identifier not in vector_io_adapter.cache
@ -121,7 +121,7 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
async def test_insert_chunks_missing_db_raises(vector_io_adapter):
vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)
vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None)
with pytest.raises(ValueError):
await vector_io_adapter.insert_chunks("db_not_exist", [])
@ -170,7 +170,7 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter
async def test_query_chunks_missing_db_raises(vector_io_adapter):
vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)
vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None)
with pytest.raises(ValueError):
await vector_io_adapter.query_chunks("db_missing", "q", None)
@ -182,7 +182,7 @@ async def test_save_openai_vector_store(vector_io_adapter):
"id": store_id,
"name": "Test Store",
"description": "A test OpenAI vector store",
"vector_db_id": "test_db",
"vector_store_id": "test_db",
"embedding_model": "test_model",
}
@ -198,7 +198,7 @@ async def test_update_openai_vector_store(vector_io_adapter):
"id": store_id,
"name": "Test Store",
"description": "A test OpenAI vector store",
"vector_db_id": "test_db",
"vector_store_id": "test_db",
"embedding_model": "test_model",
}
@ -214,7 +214,7 @@ async def test_delete_openai_vector_store(vector_io_adapter):
"id": store_id,
"name": "Test Store",
"description": "A test OpenAI vector store",
"vector_db_id": "test_db",
"vector_store_id": "test_db",
"embedding_model": "test_model",
}
@ -229,7 +229,7 @@ async def test_load_openai_vector_stores(vector_io_adapter):
"id": store_id,
"name": "Test Store",
"description": "A test OpenAI vector store",
"vector_db_id": "test_db",
"vector_store_id": "test_db",
"embedding_model": "test_model",
}
@ -998,8 +998,8 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
async def test_embedding_config_from_metadata(vector_io_adapter):
"""Test that embedding configuration is correctly extracted from metadata."""
# Mock register_vector_db to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock()
# Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider"
@ -1015,9 +1015,9 @@ async def test_embedding_config_from_metadata(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params)
# Verify VectorDB was registered with correct embedding config from metadata
vector_io_adapter.register_vector_db.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0]
# Verify VectorStore was registered with correct embedding config from metadata
vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "test-embedding-model"
assert call_args.embedding_dimension == 512
@ -1025,8 +1025,8 @@ async def test_embedding_config_from_metadata(vector_io_adapter):
async def test_embedding_config_from_extra_body(vector_io_adapter):
"""Test that embedding configuration is correctly extracted from extra_body when metadata is empty."""
# Mock register_vector_db to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock()
# Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider"
@ -1042,9 +1042,9 @@ async def test_embedding_config_from_extra_body(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params)
# Verify VectorDB was registered with correct embedding config from extra_body
vector_io_adapter.register_vector_db.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0]
# Verify VectorStore was registered with correct embedding config from extra_body
vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "extra-body-model"
assert call_args.embedding_dimension == 1024
@ -1052,8 +1052,8 @@ async def test_embedding_config_from_extra_body(vector_io_adapter):
async def test_embedding_config_consistency_check_passes(vector_io_adapter):
"""Test that consistent embedding config in both metadata and extra_body passes validation."""
# Mock register_vector_db to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock()
# Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider"
@ -1073,8 +1073,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params)
# Should not raise any error and use metadata config
vector_io_adapter.register_vector_db.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0]
vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "consistent-model"
assert call_args.embedding_dimension == 768
@ -1082,8 +1082,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter):
async def test_embedding_config_inconsistency_errors(vector_io_adapter):
"""Test that inconsistent embedding config between metadata and extra_body raises errors."""
# Mock register_vector_db to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock()
# Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider"
@ -1104,7 +1104,7 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params)
# Reset mock for second test
vector_io_adapter.register_vector_db.reset_mock()
vector_io_adapter.register_vector_store.reset_mock()
# Test with inconsistent embedding dimension
params = OpenAICreateVectorStoreRequestWithExtraBody(
@ -1126,8 +1126,8 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter):
async def test_embedding_config_defaults_when_missing(vector_io_adapter):
"""Test that embedding dimension defaults to 768 when not provided."""
# Mock register_vector_db to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock()
# Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider"
@ -1143,8 +1143,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params)
# Should default to 768 dimensions
vector_io_adapter.register_vector_db.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0]
vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "model-without-dimension"
assert call_args.embedding_dimension == 768
@ -1152,8 +1152,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter):
async def test_embedding_config_required_model_missing(vector_io_adapter):
"""Test that missing embedding model raises error."""
# Mock register_vector_db to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock()
# Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider"
# Mock the default model lookup to return None (no default model available)

View file

@ -18,7 +18,7 @@ from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRunti
class TestRagQuery:
async def test_query_raises_on_empty_vector_db_ids(self):
async def test_query_raises_on_empty_vector_store_ids(self):
rag_tool = MemoryToolRuntimeImpl(
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
)
@ -82,7 +82,7 @@ class TestRagQuery:
with pytest.raises(ValueError):
RAGQueryConfig(mode="wrong_mode")
async def test_query_adds_vector_db_id_to_chunk_metadata(self):
async def test_query_adds_vector_store_id_to_chunk_metadata(self):
rag_tool = MemoryToolRuntimeImpl(
config=MagicMock(),
vector_io_api=MagicMock(),

View file

@ -21,7 +21,7 @@ from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_io import Chunk
from llama_stack.providers.utils.memory.vector_store import (
URL,
VectorDBWithIndex,
VectorStoreWithIndex,
_validate_embedding,
content_from_doc,
make_overlapped_chunks,
@ -206,15 +206,15 @@ class TestVectorStore:
assert str(excinfo.value.__cause__) == "Cannot convert to string"
class TestVectorDBWithIndex:
class TestVectorStoreWithIndex:
async def test_insert_chunks_without_embeddings(self):
mock_vector_db = MagicMock()
mock_vector_db.embedding_model = "test-model without embeddings"
mock_vector_store = MagicMock()
mock_vector_store.embedding_model = "test-model without embeddings"
mock_index = AsyncMock()
mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
vector_store_with_index = VectorStoreWithIndex(
vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
)
chunks = [
@ -227,7 +227,7 @@ class TestVectorDBWithIndex:
OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1),
]
await vector_db_with_index.insert_chunks(chunks)
await vector_store_with_index.insert_chunks(chunks)
# Verify openai_embeddings was called with correct params
mock_inference_api.openai_embeddings.assert_called_once()
@ -243,14 +243,14 @@ class TestVectorDBWithIndex:
assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
async def test_insert_chunks_with_valid_embeddings(self):
mock_vector_db = MagicMock()
mock_vector_db.embedding_model = "test-model with embeddings"
mock_vector_db.embedding_dimension = 3
mock_vector_store = MagicMock()
mock_vector_store.embedding_model = "test-model with embeddings"
mock_vector_store.embedding_dimension = 3
mock_index = AsyncMock()
mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
vector_store_with_index = VectorStoreWithIndex(
vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
)
chunks = [
@ -258,7 +258,7 @@ class TestVectorDBWithIndex:
Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}),
]
await vector_db_with_index.insert_chunks(chunks)
await vector_store_with_index.insert_chunks(chunks)
mock_inference_api.openai_embeddings.assert_not_called()
mock_index.add_chunks.assert_called_once()
@ -267,14 +267,14 @@ class TestVectorDBWithIndex:
assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
async def test_insert_chunks_with_invalid_embeddings(self):
mock_vector_db = MagicMock()
mock_vector_db.embedding_dimension = 3
mock_vector_db.embedding_model = "test-model with invalid embeddings"
mock_vector_store = MagicMock()
mock_vector_store.embedding_dimension = 3
mock_vector_store.embedding_model = "test-model with invalid embeddings"
mock_index = AsyncMock()
mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
vector_store_with_index = VectorStoreWithIndex(
vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
)
# Verify Chunk raises ValueError for invalid embedding type
@ -283,7 +283,7 @@ class TestVectorDBWithIndex:
# Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match="Input should be a valid list"):
await vector_db_with_index.insert_chunks(
await vector_store_with_index.insert_chunks(
[
Chunk(content="Test 1", embedding=None, metadata={}),
Chunk(content="Test 2", embedding="invalid_type", metadata={}),
@ -292,7 +292,7 @@ class TestVectorDBWithIndex:
# Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
await vector_db_with_index.insert_chunks(
await vector_store_with_index.insert_chunks(
Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={})
)
@ -300,20 +300,20 @@ class TestVectorDBWithIndex:
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}),
]
with pytest.raises(ValueError, match="has dimension 4, expected 3"):
await vector_db_with_index.insert_chunks(chunks_wrong_dim)
await vector_store_with_index.insert_chunks(chunks_wrong_dim)
mock_inference_api.openai_embeddings.assert_not_called()
mock_index.add_chunks.assert_not_called()
async def test_insert_chunks_with_partially_precomputed_embeddings(self):
mock_vector_db = MagicMock()
mock_vector_db.embedding_model = "test-model with partial embeddings"
mock_vector_db.embedding_dimension = 3
mock_vector_store = MagicMock()
mock_vector_store.embedding_model = "test-model with partial embeddings"
mock_vector_store.embedding_dimension = 3
mock_index = AsyncMock()
mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api
vector_store_with_index = VectorStoreWithIndex(
vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
)
chunks = [
@ -327,7 +327,7 @@ class TestVectorDBWithIndex:
OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1),
]
await vector_db_with_index.insert_chunks(chunks)
await vector_store_with_index.insert_chunks(chunks)
# Verify openai_embeddings was called with correct params
mock_inference_api.openai_embeddings.assert_called_once()

View file

@ -8,8 +8,8 @@
import pytest
from llama_stack.apis.inference import Model
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.core.datatypes import VectorDBWithOwner
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.datatypes import VectorStoreWithOwner
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
from llama_stack.core.store.registry import (
KEY_FORMAT,
@ -20,12 +20,12 @@ from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_b
@pytest.fixture
def sample_vector_db():
return VectorDB(
identifier="test_vector_db",
def sample_vector_store():
return VectorStore(
identifier="test_vector_store",
embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768,
provider_resource_id="test_vector_db",
provider_resource_id="test_vector_store",
provider_id="test-provider",
)
@ -45,17 +45,17 @@ async def test_registry_initialization(disk_dist_registry):
assert result is None
async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model):
print(f"Registering {sample_vector_db}")
await disk_dist_registry.register(sample_vector_db)
async def test_basic_registration(disk_dist_registry, sample_vector_store, sample_model):
print(f"Registering {sample_vector_store}")
await disk_dist_registry.register(sample_vector_store)
print(f"Registering {sample_model}")
await disk_dist_registry.register(sample_model)
print("Getting vector_db")
result_vector_db = await disk_dist_registry.get("vector_db", "test_vector_db")
assert result_vector_db is not None
assert result_vector_db.identifier == sample_vector_db.identifier
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
assert result_vector_db.provider_id == sample_vector_db.provider_id
print("Getting vector_store")
result_vector_store = await disk_dist_registry.get("vector_store", "test_vector_store")
assert result_vector_store is not None
assert result_vector_store.identifier == sample_vector_store.identifier
assert result_vector_store.embedding_model == sample_vector_store.embedding_model
assert result_vector_store.provider_id == sample_vector_store.provider_id
result_model = await disk_dist_registry.get("model", "test_model")
assert result_model is not None
@ -63,11 +63,11 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m
assert result_model.provider_id == sample_model.provider_id
async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model):
async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_store, sample_model):
# First populate the disk registry
disk_registry = DiskDistributionRegistry(sqlite_kvstore)
await disk_registry.initialize()
await disk_registry.register(sample_vector_db)
await disk_registry.register(sample_vector_store)
await disk_registry.register(sample_model)
# Test cached version loads from disk
@ -79,29 +79,29 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db,
)
await cached_registry.initialize()
result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
assert result_vector_db is not None
assert result_vector_db.identifier == sample_vector_db.identifier
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension
assert result_vector_db.provider_id == sample_vector_db.provider_id
result_vector_store = await cached_registry.get("vector_store", "test_vector_store")
assert result_vector_store is not None
assert result_vector_store.identifier == sample_vector_store.identifier
assert result_vector_store.embedding_model == sample_vector_store.embedding_model
assert result_vector_store.embedding_dimension == sample_vector_store.embedding_dimension
assert result_vector_store.provider_id == sample_vector_store.provider_id
async def test_cached_registry_updates(cached_disk_dist_registry):
new_vector_db = VectorDB(
identifier="test_vector_db_2",
new_vector_store = VectorStore(
identifier="test_vector_store_2",
embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768,
provider_resource_id="test_vector_db_2",
provider_resource_id="test_vector_store_2",
provider_id="baz",
)
await cached_disk_dist_registry.register(new_vector_db)
await cached_disk_dist_registry.register(new_vector_store)
# Verify in cache
result_vector_db = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
assert result_vector_db is not None
assert result_vector_db.identifier == new_vector_db.identifier
assert result_vector_db.provider_id == new_vector_db.provider_id
result_vector_store = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2")
assert result_vector_store is not None
assert result_vector_store.identifier == new_vector_store.identifier
assert result_vector_store.provider_id == new_vector_store.provider_id
# Verify persisted to disk
db_path = cached_disk_dist_registry.kvstore.db_path
@ -111,87 +111,89 @@ async def test_cached_registry_updates(cached_disk_dist_registry):
await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry"))
)
await new_registry.initialize()
result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
assert result_vector_db is not None
assert result_vector_db.identifier == new_vector_db.identifier
assert result_vector_db.provider_id == new_vector_db.provider_id
result_vector_store = await new_registry.get("vector_store", "test_vector_store_2")
assert result_vector_store is not None
assert result_vector_store.identifier == new_vector_store.identifier
assert result_vector_store.provider_id == new_vector_store.provider_id
async def test_duplicate_provider_registration(cached_disk_dist_registry):
original_vector_db = VectorDB(
identifier="test_vector_db_2",
original_vector_store = VectorStore(
identifier="test_vector_store_2",
embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768,
provider_resource_id="test_vector_db_2",
provider_resource_id="test_vector_store_2",
provider_id="baz",
)
assert await cached_disk_dist_registry.register(original_vector_db)
assert await cached_disk_dist_registry.register(original_vector_store)
duplicate_vector_db = VectorDB(
identifier="test_vector_db_2",
duplicate_vector_store = VectorStore(
identifier="test_vector_store_2",
embedding_model="different-model",
embedding_dimension=768,
provider_resource_id="test_vector_db_2",
provider_resource_id="test_vector_store_2",
provider_id="baz", # Same provider_id
)
with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db_2' already exists"):
await cached_disk_dist_registry.register(duplicate_vector_db)
with pytest.raises(
ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store_2' already exists"
):
await cached_disk_dist_registry.register(duplicate_vector_store)
result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2")
result = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2")
assert result is not None
assert result.embedding_model == original_vector_db.embedding_model # Original values preserved
assert result.embedding_model == original_vector_store.embedding_model # Original values preserved
async def test_get_all_objects(cached_disk_dist_registry):
# Create multiple test banks
# Create multiple test banks
test_vector_dbs = [
VectorDB(
identifier=f"test_vector_db_{i}",
test_vector_stores = [
VectorStore(
identifier=f"test_vector_store_{i}",
embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768,
provider_resource_id=f"test_vector_db_{i}",
provider_resource_id=f"test_vector_store_{i}",
provider_id=f"provider_{i}",
)
for i in range(3)
]
# Register all vector_dbs
for vector_db in test_vector_dbs:
await cached_disk_dist_registry.register(vector_db)
# Register all vector_stores
for vector_store in test_vector_stores:
await cached_disk_dist_registry.register(vector_store)
# Test get_all retrieval
all_results = await cached_disk_dist_registry.get_all()
assert len(all_results) == 3
# Verify each vector_db was stored correctly
for original_vector_db in test_vector_dbs:
matching_vector_dbs = [v for v in all_results if v.identifier == original_vector_db.identifier]
assert len(matching_vector_dbs) == 1
stored_vector_db = matching_vector_dbs[0]
assert stored_vector_db.embedding_model == original_vector_db.embedding_model
assert stored_vector_db.provider_id == original_vector_db.provider_id
assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension
# Verify each vector_store was stored correctly
for original_vector_store in test_vector_stores:
matching_vector_stores = [v for v in all_results if v.identifier == original_vector_store.identifier]
assert len(matching_vector_stores) == 1
stored_vector_store = matching_vector_stores[0]
assert stored_vector_store.embedding_model == original_vector_store.embedding_model
assert stored_vector_store.provider_id == original_vector_store.provider_id
assert stored_vector_store.embedding_dimension == original_vector_store.embedding_dimension
async def test_parse_registry_values_error_handling(sqlite_kvstore):
valid_db = VectorDB(
identifier="valid_vector_db",
valid_db = VectorStore(
identifier="valid_vector_store",
embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768,
provider_resource_id="valid_vector_db",
provider_resource_id="valid_vector_store",
provider_id="test-provider",
)
await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json()
KEY_FORMAT.format(type="vector_store", identifier="valid_vector_store"), valid_db.model_dump_json()
)
await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json")
await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_store", identifier="corrupted_json"), "{not valid json")
await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="missing_fields"),
'{"type": "vector_db", "identifier": "missing_fields"}',
KEY_FORMAT.format(type="vector_store", identifier="missing_fields"),
'{"type": "vector_store", "identifier": "missing_fields"}',
)
test_registry = DiskDistributionRegistry(sqlite_kvstore)
@ -202,18 +204,18 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore):
# Should have filtered out the invalid entries
assert len(all_objects) == 1
assert all_objects[0].identifier == "valid_vector_db"
assert all_objects[0].identifier == "valid_vector_store"
# Check that the get method also handles errors correctly
invalid_obj = await test_registry.get("vector_db", "corrupted_json")
invalid_obj = await test_registry.get("vector_store", "corrupted_json")
assert invalid_obj is None
invalid_obj = await test_registry.get("vector_db", "missing_fields")
invalid_obj = await test_registry.get("vector_store", "missing_fields")
assert invalid_obj is None
async def test_cached_registry_error_handling(sqlite_kvstore):
valid_db = VectorDB(
valid_db = VectorStore(
identifier="valid_cached_db",
embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768,
@ -222,12 +224,12 @@ async def test_cached_registry_error_handling(sqlite_kvstore):
)
await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json()
KEY_FORMAT.format(type="vector_store", identifier="valid_cached_db"), valid_db.model_dump_json()
)
await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="invalid_cached_db"),
'{"type": "vector_db", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string
KEY_FORMAT.format(type="vector_store", identifier="invalid_cached_db"),
'{"type": "vector_store", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string
)
cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore)
@ -237,63 +239,65 @@ async def test_cached_registry_error_handling(sqlite_kvstore):
assert len(all_objects) == 1
assert all_objects[0].identifier == "valid_cached_db"
invalid_obj = await cached_registry.get("vector_db", "invalid_cached_db")
invalid_obj = await cached_registry.get("vector_store", "invalid_cached_db")
assert invalid_obj is None
async def test_double_registration_identical_objects(disk_dist_registry):
"""Test that registering identical objects succeeds (idempotent)."""
vector_db = VectorDBWithOwner(
identifier="test_vector_db",
vector_store = VectorStoreWithOwner(
identifier="test_vector_store",
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
provider_resource_id="test_vector_db",
provider_resource_id="test_vector_store",
provider_id="test-provider",
)
# First registration should succeed
result1 = await disk_dist_registry.register(vector_db)
result1 = await disk_dist_registry.register(vector_store)
assert result1 is True
# Second registration of identical object should also succeed (idempotent)
result2 = await disk_dist_registry.register(vector_db)
result2 = await disk_dist_registry.register(vector_store)
assert result2 is True
# Verify object exists and is unchanged
retrieved = await disk_dist_registry.get("vector_db", "test_vector_db")
retrieved = await disk_dist_registry.get("vector_store", "test_vector_store")
assert retrieved is not None
assert retrieved.identifier == vector_db.identifier
assert retrieved.embedding_model == vector_db.embedding_model
assert retrieved.identifier == vector_store.identifier
assert retrieved.embedding_model == vector_store.embedding_model
async def test_double_registration_different_objects(disk_dist_registry):
"""Test that registering different objects with same identifier fails."""
vector_db1 = VectorDBWithOwner(
identifier="test_vector_db",
vector_store1 = VectorStoreWithOwner(
identifier="test_vector_store",
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
provider_resource_id="test_vector_db",
provider_resource_id="test_vector_store",
provider_id="test-provider",
)
vector_db2 = VectorDBWithOwner(
identifier="test_vector_db", # Same identifier
vector_store2 = VectorStoreWithOwner(
identifier="test_vector_store", # Same identifier
embedding_model="different-model", # Different embedding model
embedding_dimension=384,
provider_resource_id="test_vector_db",
provider_resource_id="test_vector_store",
provider_id="test-provider",
)
# First registration should succeed
result1 = await disk_dist_registry.register(vector_db1)
result1 = await disk_dist_registry.register(vector_store1)
assert result1 is True
# Second registration with different data should fail
with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db' already exists"):
await disk_dist_registry.register(vector_db2)
with pytest.raises(
ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store' already exists"
):
await disk_dist_registry.register(vector_store2)
# Verify original object is unchanged
retrieved = await disk_dist_registry.get("vector_db", "test_vector_db")
retrieved = await disk_dist_registry.get("vector_store", "test_vector_store")
assert retrieved is not None
assert retrieved.embedding_model == "all-MiniLM-L6-v2" # Original value

View file

@ -41,7 +41,7 @@ class TestTranslateException:
self.identifier = identifier
self.owner = owner
resource = MockResource("vector_db", "test-db")
resource = MockResource("vector_store", "test-db")
exc = AccessDeniedError("create", resource, user)
result = translate_exception(exc)
@ -49,7 +49,7 @@ class TestTranslateException:
assert isinstance(result, HTTPException)
assert result.status_code == 403
assert "test-user" in result.detail
assert "vector_db::test-db" in result.detail
assert "vector_store::test-db" in result.detail
assert "create" in result.detail
assert "roles=['user']" in result.detail
assert "teams=['dev']" in result.detail