chore(cleanup)!: kill vector_db references as far as possible (#3864)

There should not be "vector db" anywhere.
This commit is contained in:
Ashwin Bharambe 2025-10-20 20:06:16 -07:00 committed by GitHub
parent 444f6c88f3
commit 122de785c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
46 changed files with 701 additions and 822 deletions

View file

@ -6440,7 +6440,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -9132,7 +9132,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -9440,7 +9440,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -10203,7 +10203,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -11325,7 +11325,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -12652,7 +12652,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark

View file

@ -32,7 +32,6 @@ Commands:
scoring_functions Manage scoring functions. scoring_functions Manage scoring functions.
shields Manage safety shield services. shields Manage safety shield services.
toolgroups Manage available tool groups. toolgroups Manage available tool groups.
vector_dbs Manage vector databases.
``` ```
### `llama-stack-client configure` ### `llama-stack-client configure`
@ -211,53 +210,6 @@ Unregister a model from distribution endpoint
llama-stack-client models unregister <model_id> llama-stack-client models unregister <model_id>
``` ```
## Vector DB Management
Manage vector databases.
### `llama-stack-client vector_dbs list`
Show available vector dbs on distribution endpoint
```bash
llama-stack-client vector_dbs list
```
```
┏━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ provider_resource_id ┃ vector_db_type ┃ params ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ my_demo_vector_db │ faiss │ my_demo_vector_db │ │ embedding_dimension: 768 │
│ │ │ │ │ embedding_model: nomic-embed-text-v1.5 │
│ │ │ │ │ type: vector_db │
│ │ │ │ │ │
└──────────────────────────┴─────────────┴──────────────────────────┴────────────────┴───────────────────────────────────┘
```
### `llama-stack-client vector_dbs register`
Create a new vector db
```bash
llama-stack-client vector_dbs register <vector-db-id> [--provider-id <provider-id>] [--provider-vector-db-id <provider-vector-db-id>] [--embedding-model <embedding-model>] [--embedding-dimension <embedding-dimension>]
```
Required arguments:
- `VECTOR_DB_ID`: Vector DB ID
Optional arguments:
- `--provider-id`: Provider ID for the vector db
- `--provider-vector-db-id`: Provider's vector db ID
- `--embedding-model`: Embedding model to use. Default: `nomic-embed-text-v1.5`
- `--embedding-dimension`: Dimension of embeddings. Default: 768
### `llama-stack-client vector_dbs unregister`
Delete a vector db
```bash
llama-stack-client vector_dbs unregister <vector-db-id>
```
Required arguments:
- `VECTOR_DB_ID`: Vector DB ID
## Shield Management ## Shield Management
Manage safety shield services. Manage safety shield services.
### `llama-stack-client shields list` ### `llama-stack-client shields list`

View file

@ -5547,7 +5547,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -5798,7 +5798,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",

View file

@ -4114,7 +4114,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -4303,7 +4303,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark

View file

@ -1850,7 +1850,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -3983,7 +3983,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",

View file

@ -1320,7 +1320,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -2927,7 +2927,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark

View file

@ -6800,7 +6800,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -10205,7 +10205,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -10687,7 +10687,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -11740,7 +11740,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",

View file

@ -5227,7 +5227,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -7919,7 +7919,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -8227,7 +8227,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -8990,7 +8990,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark

View file

@ -8472,7 +8472,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -11877,7 +11877,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -12359,7 +12359,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -13412,7 +13412,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -14959,7 +14959,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",
@ -16704,7 +16704,7 @@
"enum": [ "enum": [
"model", "model",
"shield", "shield",
"vector_db", "vector_store",
"dataset", "dataset",
"scoring_function", "scoring_function",
"benchmark", "benchmark",

View file

@ -6440,7 +6440,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -9132,7 +9132,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -9440,7 +9440,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -10203,7 +10203,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -11325,7 +11325,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark
@ -12652,7 +12652,7 @@ components:
enum: enum:
- model - model
- shield - shield
- vector_db - vector_store
- dataset - dataset
- scoring_function - scoring_function
- benchmark - benchmark

View file

@ -121,7 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
models = "models" models = "models"
shields = "shields" shields = "shields"
vector_dbs = "vector_dbs" # only used for routing vector_stores = "vector_stores" # only used for routing table
datasets = "datasets" datasets = "datasets"
scoring_functions = "scoring_functions" scoring_functions = "scoring_functions"
benchmarks = "benchmarks" benchmarks = "benchmarks"

View file

@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
class ResourceType(StrEnum): class ResourceType(StrEnum):
model = "model" model = "model"
shield = "shield" shield = "shield"
vector_db = "vector_db" vector_store = "vector_store"
dataset = "dataset" dataset = "dataset"
scoring_function = "scoring_function" scoring_function = "scoring_function"
benchmark = "benchmark" benchmark = "benchmark"
@ -34,4 +34,4 @@ class Resource(BaseModel):
provider_id: str = Field(description="ID of the provider that owns this resource") provider_id: str = Field(description="ID of the provider that owns this resource")
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)") type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")

View file

@ -1,93 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class VectorDB(Resource):
"""Vector database resource for storing and querying vector embeddings.
:param type: Type of resource, always 'vector_db' for vector databases
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
"""
type: Literal[ResourceType.vector_db] = ResourceType.vector_db
embedding_model: str
embedding_dimension: int
vector_db_name: str | None = None
@property
def vector_db_id(self) -> str:
return self.identifier
@property
def provider_vector_db_id(self) -> str | None:
return self.provider_resource_id
class VectorDBInput(BaseModel):
"""Input parameters for creating or configuring a vector database.
:param vector_db_id: Unique identifier for the vector database
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
:param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
"""
vector_db_id: str
embedding_model: str
embedding_dimension: int
provider_id: str | None = None
provider_vector_db_id: str | None = None
class ListVectorDBsResponse(BaseModel):
"""Response from listing vector databases.
:param data: List of vector databases
"""
data: list[VectorDB]
@runtime_checkable
class VectorDBs(Protocol):
"""Internal protocol for vector_dbs routing - no public API endpoints."""
async def list_vector_dbs(self) -> ListVectorDBsResponse:
"""Internal method to list vector databases."""
...
async def get_vector_db(
self,
vector_db_id: str,
) -> VectorDB:
"""Internal method to get a vector database by ID."""
...
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> VectorDB:
"""Internal method to register a vector database."""
...
async def unregister_vector_db(self, vector_db_id: str) -> None:
"""Internal method to unregister a vector database."""
...

View file

@ -15,7 +15,7 @@ from fastapi import Body
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
total: int total: int
# TODO: rename this as OpenAIVectorStore
@json_schema_type @json_schema_type
class VectorStoreObject(BaseModel): class VectorStoreObject(BaseModel):
"""OpenAI Vector Store object. """OpenAI Vector Store object.
@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
chunking_strategy: VectorStoreChunkingStrategy | None = None chunking_strategy: VectorStoreChunkingStrategy | None = None
class VectorDBStore(Protocol): class VectorStoreTable(Protocol):
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ... def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class VectorIO(Protocol): class VectorIO(Protocol):
vector_db_store: VectorDBStore | None = None vector_store_table: VectorStoreTable | None = None
# this will just block now until chunks are inserted, but it should # this will just block now until chunks are inserted, but it should
# probably return a Job instance which can be polled for completion # probably return a Job instance which can be polled for completion
# TODO: rename vector_db_id to vector_store_id once Stainless is working
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
async def insert_chunks( async def insert_chunks(
self, self,
@ -546,6 +548,7 @@ class VectorIO(Protocol):
""" """
... ...
# TODO: rename vector_db_id to vector_store_id once Stainless is working
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
async def query_chunks( async def query_chunks(
self, self,

View file

@ -4,4 +4,4 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from .vector_dbs import * from .vector_stores import *

View file

@ -0,0 +1,51 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Literal
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
# Internal resource type for storing the vector store routing and other information
class VectorStore(Resource):
"""Vector database resource for storing and querying vector embeddings.
:param type: Type of resource, always 'vector_store' for vector stores
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
"""
type: Literal[ResourceType.vector_store] = ResourceType.vector_store
embedding_model: str
embedding_dimension: int
vector_store_name: str | None = None
@property
def vector_store_id(self) -> str:
return self.identifier
@property
def provider_vector_store_id(self) -> str | None:
return self.provider_resource_id
class VectorStoreInput(BaseModel):
"""Input parameters for creating or configuring a vector database.
:param vector_store_id: Unique identifier for the vector store
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
"""
vector_store_id: str
embedding_model: str
embedding_dimension: int
provider_id: str | None = None
provider_vector_store_id: str | None = None

View file

@ -41,7 +41,7 @@ class AccessRule(BaseModel):
A rule defines a list of action either to permit or to forbid. It may specify a A rule defines a list of action either to permit or to forbid. It may specify a
principal or a resource that must match for the rule to take effect. The resource principal or a resource that must match for the rule to take effect. The resource
to match should be specified in the form of a type qualified identifier, e.g. to match should be specified in the form of a type qualified identifier, e.g.
model::my-model or vector_db::some-db, or a wildcard for all resources of a type, model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
e.g. model::*. If the principal or resource are not specified, they will match all e.g. model::*. If the principal or resource are not specified, they will match all
requests. requests.
@ -79,9 +79,9 @@ class AccessRule(BaseModel):
description: any user has read access to any resource created by a member of their team description: any user has read access to any resource created by a member of their team
- forbid: - forbid:
actions: [create, read, delete] actions: [create, read, delete]
resource: vector_db::* resource: vector_store::*
unless: user with admin in roles unless: user with admin in roles
description: only user with admin role can use vector_db resources description: only user with admin role can use vector_store resources
""" """

View file

@ -23,8 +23,8 @@ from llama_stack.apis.scoring import Scoring
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
from llama_stack.apis.shields import Shield, ShieldInput from llama_stack.apis.shields import Shield, ShieldInput
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.datatypes import (
KVStoreReference, KVStoreReference,
@ -71,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner):
pass pass
class VectorDBWithOwner(VectorDB, ResourceWithOwner): class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
pass pass
@ -91,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
pass pass
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
RoutableObjectWithProvider = Annotated[ RoutableObjectWithProvider = Annotated[
ModelWithOwner ModelWithOwner
| ShieldWithOwner | ShieldWithOwner
| VectorDBWithOwner | VectorStoreWithOwner
| DatasetWithOwner | DatasetWithOwner
| ScoringFnWithOwner | ScoringFnWithOwner
| BenchmarkWithOwner | BenchmarkWithOwner
@ -427,7 +427,7 @@ class RegisteredResources(BaseModel):
models: list[ModelInput] = Field(default_factory=list) models: list[ModelInput] = Field(default_factory=list)
shields: list[ShieldInput] = Field(default_factory=list) shields: list[ShieldInput] = Field(default_factory=list)
vector_dbs: list[VectorDBInput] = Field(default_factory=list) vector_stores: list[VectorStoreInput] = Field(default_factory=list)
datasets: list[DatasetInput] = Field(default_factory=list) datasets: list[DatasetInput] = Field(default_factory=list)
scoring_fns: list[ScoringFnInput] = Field(default_factory=list) scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
benchmarks: list[BenchmarkInput] = Field(default_factory=list) benchmarks: list[BenchmarkInput] = Field(default_factory=list)

View file

@ -64,7 +64,7 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
router_api=Api.tool_runtime, router_api=Api.tool_runtime,
), ),
AutoRoutedApiInfo( AutoRoutedApiInfo(
routing_table_api=Api.vector_dbs, routing_table_api=Api.vector_stores,
router_api=Api.vector_io, router_api=Api.vector_io,
), ),
] ]

View file

@ -29,8 +29,8 @@ from llama_stack.apis.scoring_functions import ScoringFunctions
from llama_stack.apis.shields import Shields from llama_stack.apis.shields import Shields
from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDBs
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.core.client import get_client_impl from llama_stack.core.client import get_client_impl
from llama_stack.core.datatypes import ( from llama_stack.core.datatypes import (
@ -82,7 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
Api.inspect: Inspect, Api.inspect: Inspect,
Api.batches: Batches, Api.batches: Batches,
Api.vector_io: VectorIO, Api.vector_io: VectorIO,
Api.vector_dbs: VectorDBs, Api.vector_stores: VectorStore,
Api.models: Models, Api.models: Models,
Api.safety: Safety, Api.safety: Safety,
Api.shields: Shields, Api.shields: Shields,

View file

@ -29,7 +29,7 @@ async def get_routing_table_impl(
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
from ..routing_tables.shields import ShieldsRoutingTable from ..routing_tables.shields import ShieldsRoutingTable
from ..routing_tables.toolgroups import ToolGroupsRoutingTable from ..routing_tables.toolgroups import ToolGroupsRoutingTable
from ..routing_tables.vector_dbs import VectorDBsRoutingTable from ..routing_tables.vector_stores import VectorStoresRoutingTable
api_to_tables = { api_to_tables = {
"models": ModelsRoutingTable, "models": ModelsRoutingTable,
@ -38,7 +38,7 @@ async def get_routing_table_impl(
"scoring_functions": ScoringFunctionsRoutingTable, "scoring_functions": ScoringFunctionsRoutingTable,
"benchmarks": BenchmarksRoutingTable, "benchmarks": BenchmarksRoutingTable,
"tool_groups": ToolGroupsRoutingTable, "tool_groups": ToolGroupsRoutingTable,
"vector_dbs": VectorDBsRoutingTable, "vector_stores": VectorStoresRoutingTable,
} }
if api.value not in api_to_tables: if api.value not in api_to_tables:

View file

@ -37,24 +37,24 @@ class ToolRuntimeRouter(ToolRuntime):
async def query( async def query(
self, self,
content: InterleavedContent, content: InterleavedContent,
vector_db_ids: list[str], vector_store_ids: list[str],
query_config: RAGQueryConfig | None = None, query_config: RAGQueryConfig | None = None,
) -> RAGQueryResult: ) -> RAGQueryResult:
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}") logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
provider = await self.routing_table.get_provider_impl("knowledge_search") provider = await self.routing_table.get_provider_impl("knowledge_search")
return await provider.query(content, vector_db_ids, query_config) return await provider.query(content, vector_store_ids, query_config)
async def insert( async def insert(
self, self,
documents: list[RAGDocument], documents: list[RAGDocument],
vector_db_id: str, vector_store_id: str,
chunk_size_in_tokens: int = 512, chunk_size_in_tokens: int = 512,
) -> None: ) -> None:
logger.debug( logger.debug(
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}" f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
) )
provider = await self.routing_table.get_provider_impl("insert_into_memory") provider = await self.routing_table.get_provider_impl("insert_into_memory")
return await provider.insert(documents, vector_db_id, chunk_size_in_tokens) return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)
def __init__( def __init__(
self, self,

View file

@ -71,25 +71,6 @@ class VectorIORouter(VectorIO):
raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model") raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> None:
logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
await self.routing_table.register_vector_db(
vector_db_id,
embedding_model,
embedding_dimension,
provider_id,
vector_db_name,
provider_vector_db_id,
)
async def insert_chunks( async def insert_chunks(
self, self,
vector_db_id: str, vector_db_id: str,
@ -165,22 +146,22 @@ class VectorIORouter(VectorIO):
else: else:
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0] provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
vector_db_id = f"vs_{uuid.uuid4()}" vector_store_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db( registered_vector_store = await self.routing_table.register_vector_store(
vector_db_id=vector_db_id, vector_store_id=vector_store_id,
embedding_model=embedding_model, embedding_model=embedding_model,
embedding_dimension=embedding_dimension, embedding_dimension=embedding_dimension,
provider_id=provider_id, provider_id=provider_id,
provider_vector_db_id=vector_db_id, provider_vector_store_id=vector_store_id,
vector_db_name=params.name, vector_store_name=params.name,
) )
provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier) provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)
# Update model_extra with registered values so provider uses the already-registered vector_db # Update model_extra with registered values so provider uses the already-registered vector_store
if params.model_extra is None: if params.model_extra is None:
params.model_extra = {} params.model_extra = {}
params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
params.model_extra["provider_id"] = registered_vector_db.provider_id params.model_extra["provider_id"] = registered_vector_store.provider_id
if embedding_model is not None: if embedding_model is not None:
params.model_extra["embedding_model"] = embedding_model params.model_extra["embedding_model"] = embedding_model
if embedding_dimension is not None: if embedding_dimension is not None:
@ -198,15 +179,15 @@ class VectorIORouter(VectorIO):
logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}") logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
# Route to default provider for now - could aggregate from all providers in the future # Route to default provider for now - could aggregate from all providers in the future
# call retrieve on each vector dbs to get list of vector stores # call retrieve on each vector dbs to get list of vector stores
vector_dbs = await self.routing_table.get_all_with_type("vector_db") vector_stores = await self.routing_table.get_all_with_type("vector_store")
all_stores = [] all_stores = []
for vector_db in vector_dbs: for vector_store in vector_stores:
try: try:
provider = await self.routing_table.get_provider_impl(vector_db.identifier) provider = await self.routing_table.get_provider_impl(vector_store.identifier)
vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier) vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
all_stores.append(vector_store) all_stores.append(vector_store)
except Exception as e: except Exception as e:
logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}") logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
continue continue
# Sort by created_at # Sort by created_at

View file

@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
elif api == Api.safety: elif api == Api.safety:
return await p.register_shield(obj) return await p.register_shield(obj)
elif api == Api.vector_io: elif api == Api.vector_io:
return await p.register_vector_db(obj) return await p.register_vector_store(obj)
elif api == Api.datasetio: elif api == Api.datasetio:
return await p.register_dataset(obj) return await p.register_dataset(obj)
elif api == Api.scoring: elif api == Api.scoring:
@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None: async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
api = get_impl_api(p) api = get_impl_api(p)
if api == Api.vector_io: if api == Api.vector_io:
return await p.unregister_vector_db(obj.identifier) return await p.unregister_vector_store(obj.identifier)
elif api == Api.inference: elif api == Api.inference:
return await p.unregister_model(obj.identifier) return await p.unregister_model(obj.identifier)
elif api == Api.safety: elif api == Api.safety:
@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable):
elif api == Api.safety: elif api == Api.safety:
p.shield_store = self p.shield_store = self
elif api == Api.vector_io: elif api == Api.vector_io:
p.vector_db_store = self p.vector_store_store = self
elif api == Api.datasetio: elif api == Api.datasetio:
p.dataset_store = self p.dataset_store = self
elif api == Api.scoring: elif api == Api.scoring:
@ -134,15 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
from .scoring_functions import ScoringFunctionsRoutingTable from .scoring_functions import ScoringFunctionsRoutingTable
from .shields import ShieldsRoutingTable from .shields import ShieldsRoutingTable
from .toolgroups import ToolGroupsRoutingTable from .toolgroups import ToolGroupsRoutingTable
from .vector_dbs import VectorDBsRoutingTable from .vector_stores import VectorStoresRoutingTable
def apiname_object(): def apiname_object():
if isinstance(self, ModelsRoutingTable): if isinstance(self, ModelsRoutingTable):
return ("Inference", "model") return ("Inference", "model")
elif isinstance(self, ShieldsRoutingTable): elif isinstance(self, ShieldsRoutingTable):
return ("Safety", "shield") return ("Safety", "shield")
elif isinstance(self, VectorDBsRoutingTable): elif isinstance(self, VectorStoresRoutingTable):
return ("VectorIO", "vector_db") return ("VectorIO", "vector_store")
elif isinstance(self, DatasetsRoutingTable): elif isinstance(self, DatasetsRoutingTable):
return ("DatasetIO", "dataset") return ("DatasetIO", "dataset")
elif isinstance(self, ScoringFunctionsRoutingTable): elif isinstance(self, ScoringFunctionsRoutingTable):

View file

@ -6,15 +6,12 @@
from typing import Any from typing import Any
from pydantic import TypeAdapter
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.apis.resource import ResourceType from llama_stack.apis.resource import ResourceType
# Removed VectorDBs import to avoid exposing public API # Removed VectorStores import to avoid exposing public API
from llama_stack.apis.vector_io.vector_io import ( from llama_stack.apis.vector_io.vector_io import (
OpenAICreateVectorStoreRequestWithExtraBody,
SearchRankingOptions, SearchRankingOptions,
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreDeleteResponse, VectorStoreDeleteResponse,
@ -26,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import (
VectorStoreSearchResponsePage, VectorStoreSearchResponsePage,
) )
from llama_stack.core.datatypes import ( from llama_stack.core.datatypes import (
VectorDBWithOwner, VectorStoreWithOwner,
) )
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -35,23 +32,23 @@ from .common import CommonRoutingTableImpl, lookup_model
logger = get_logger(name=__name__, category="core::routing_tables") logger = get_logger(name=__name__, category="core::routing_tables")
class VectorDBsRoutingTable(CommonRoutingTableImpl): class VectorStoresRoutingTable(CommonRoutingTableImpl):
"""Internal routing table for vector_db operations. """Internal routing table for vector_store operations.
Does not inherit from VectorDBs to avoid exposing public API endpoints. Does not inherit from VectorStores to avoid exposing public API endpoints.
Only provides internal routing functionality for VectorIORouter. Only provides internal routing functionality for VectorIORouter.
""" """
# Internal methods only - no public API exposure # Internal methods only - no public API exposure
async def register_vector_db( async def register_vector_store(
self, self,
vector_db_id: str, vector_store_id: str,
embedding_model: str, embedding_model: str,
embedding_dimension: int | None = 384, embedding_dimension: int | None = 384,
provider_id: str | None = None, provider_id: str | None = None,
provider_vector_db_id: str | None = None, provider_vector_store_id: str | None = None,
vector_db_name: str | None = None, vector_store_name: str | None = None,
) -> Any: ) -> Any:
if provider_id is None: if provider_id is None:
if len(self.impls_by_provider_id) > 0: if len(self.impls_by_provider_id) > 0:
@ -67,52 +64,24 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
raise ModelNotFoundError(embedding_model) raise ModelNotFoundError(embedding_model)
if model.model_type != ModelType.embedding: if model.model_type != ModelType.embedding:
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding) raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
if "embedding_dimension" not in model.metadata:
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
try: vector_store = VectorStoreWithOwner(
provider = self.impls_by_provider_id[provider_id] identifier=vector_store_id,
except KeyError: type=ResourceType.vector_store.value,
available_providers = list(self.impls_by_provider_id.keys())
raise ValueError(
f"Provider '{provider_id}' not found in routing table. Available providers: {available_providers}"
) from None
logger.warning(
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
)
request = OpenAICreateVectorStoreRequestWithExtraBody(
name=vector_db_name or vector_db_id,
embedding_model=embedding_model,
embedding_dimension=model.metadata["embedding_dimension"],
provider_id=provider_id, provider_id=provider_id,
provider_vector_db_id=provider_vector_db_id, provider_resource_id=provider_vector_store_id,
embedding_model=embedding_model,
embedding_dimension=embedding_dimension,
vector_store_name=vector_store_name,
) )
vector_store = await provider.openai_create_vector_store(request) await self.register_object(vector_store)
return vector_store
vector_store_id = vector_store.id
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
logger.warning(
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
)
vector_db_data = {
"identifier": vector_store_id,
"type": ResourceType.vector_db.value,
"provider_id": provider_id,
"provider_resource_id": actual_provider_vector_db_id,
"embedding_model": embedding_model,
"embedding_dimension": model.metadata["embedding_dimension"],
"vector_db_name": vector_store.name,
}
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
await self.register_object(vector_db)
return vector_db
async def openai_retrieve_vector_store( async def openai_retrieve_vector_store(
self, self,
vector_store_id: str, vector_store_id: str,
) -> VectorStoreObject: ) -> VectorStoreObject:
await self.assert_action_allowed("read", "vector_db", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store(vector_store_id) return await provider.openai_retrieve_vector_store(vector_store_id)
@ -123,7 +92,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
expires_after: dict[str, Any] | None = None, expires_after: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None,
) -> VectorStoreObject: ) -> VectorStoreObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id) await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store( return await provider.openai_update_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -136,18 +105,18 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
self, self,
vector_store_id: str, vector_store_id: str,
) -> VectorStoreDeleteResponse: ) -> VectorStoreDeleteResponse:
await self.assert_action_allowed("delete", "vector_db", vector_store_id) await self.assert_action_allowed("delete", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
result = await provider.openai_delete_vector_store(vector_store_id) result = await provider.openai_delete_vector_store(vector_store_id)
await self.unregister_vector_db(vector_store_id) await self.unregister_vector_store(vector_store_id)
return result return result
async def unregister_vector_db(self, vector_store_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
"""Remove the vector store from the routing table registry.""" """Remove the vector store from the routing table registry."""
try: try:
vector_db_obj = await self.get_object_by_identifier("vector_db", vector_store_id) vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
if vector_db_obj: if vector_store_obj:
await self.unregister_object(vector_db_obj) await self.unregister_object(vector_store_obj)
except Exception as e: except Exception as e:
# Log the error but don't fail the operation # Log the error but don't fail the operation
logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}") logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
@ -162,7 +131,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
rewrite_query: bool | None = False, rewrite_query: bool | None = False,
search_mode: str | None = "vector", search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage: ) -> VectorStoreSearchResponsePage:
await self.assert_action_allowed("read", "vector_db", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_search_vector_store( return await provider.openai_search_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -181,7 +150,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
attributes: dict[str, Any] | None = None, attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None, chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id) await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store( return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -199,7 +168,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
before: str | None = None, before: str | None = None,
filter: VectorStoreFileStatus | None = None, filter: VectorStoreFileStatus | None = None,
) -> list[VectorStoreFileObject]: ) -> list[VectorStoreFileObject]:
await self.assert_action_allowed("read", "vector_db", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store( return await provider.openai_list_files_in_vector_store(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -215,7 +184,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
await self.assert_action_allowed("read", "vector_db", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file( return await provider.openai_retrieve_vector_store_file(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -227,7 +196,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileContentsResponse: ) -> VectorStoreFileContentsResponse:
await self.assert_action_allowed("read", "vector_db", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents( return await provider.openai_retrieve_vector_store_file_contents(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -240,7 +209,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
file_id: str, file_id: str,
attributes: dict[str, Any], attributes: dict[str, Any],
) -> VectorStoreFileObject: ) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_db", vector_store_id) await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store_file( return await provider.openai_update_vector_store_file(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -253,7 +222,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
vector_store_id: str, vector_store_id: str,
file_id: str, file_id: str,
) -> VectorStoreFileDeleteResponse: ) -> VectorStoreFileDeleteResponse:
await self.assert_action_allowed("delete", "vector_db", vector_store_id) await self.assert_action_allowed("delete", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store_file( return await provider.openai_delete_vector_store_file(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -267,7 +236,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
attributes: dict[str, Any] | None = None, attributes: dict[str, Any] | None = None,
chunking_strategy: Any | None = None, chunking_strategy: Any | None = None,
): ):
await self.assert_action_allowed("update", "vector_db", vector_store_id) await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_create_vector_store_file_batch( return await provider.openai_create_vector_store_file_batch(
vector_store_id=vector_store_id, vector_store_id=vector_store_id,
@ -281,7 +250,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
batch_id: str, batch_id: str,
vector_store_id: str, vector_store_id: str,
): ):
await self.assert_action_allowed("read", "vector_db", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_batch( return await provider.openai_retrieve_vector_store_file_batch(
batch_id=batch_id, batch_id=batch_id,
@ -298,7 +267,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
limit: int | None = 20, limit: int | None = 20,
order: str | None = "desc", order: str | None = "desc",
): ):
await self.assert_action_allowed("read", "vector_db", vector_store_id) await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store_file_batch( return await provider.openai_list_files_in_vector_store_file_batch(
batch_id=batch_id, batch_id=batch_id,
@ -315,7 +284,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl):
batch_id: str, batch_id: str,
vector_store_id: str, vector_store_id: str,
): ):
await self.assert_action_allowed("update", "vector_db", vector_store_id) await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id) provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_cancel_vector_store_file_batch( return await provider.openai_cancel_vector_store_file_batch(
batch_id=batch_id, batch_id=batch_id,

View file

@ -32,7 +32,7 @@ def tool_chat_page():
tool_groups_list = [tool_group.identifier for tool_group in tool_groups] tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")] mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")] builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
selected_vector_dbs = [] selected_vector_stores = []
def reset_agent(): def reset_agent():
st.session_state.clear() st.session_state.clear()
@ -55,13 +55,13 @@ def tool_chat_page():
) )
if "builtin::rag" in toolgroup_selection: if "builtin::rag" in toolgroup_selection:
vector_dbs = llama_stack_api.client.vector_dbs.list() or [] vector_stores = llama_stack_api.client.vector_stores.list() or []
if not vector_dbs: if not vector_stores:
st.info("No vector databases available for selection.") st.info("No vector databases available for selection.")
vector_dbs = [vector_db.identifier for vector_db in vector_dbs] vector_stores = [vector_store.identifier for vector_store in vector_stores]
selected_vector_dbs = st.multiselect( selected_vector_stores = st.multiselect(
label="Select Document Collections to use in RAG queries", label="Select Document Collections to use in RAG queries",
options=vector_dbs, options=vector_stores,
on_change=reset_agent, on_change=reset_agent,
) )
@ -119,7 +119,7 @@ def tool_chat_page():
tool_dict = dict( tool_dict = dict(
name="builtin::rag", name="builtin::rag",
args={ args={
"vector_db_ids": list(selected_vector_dbs), "vector_store_ids": list(selected_vector_stores),
}, },
) )
toolgroup_selection[i] = tool_dict toolgroup_selection[i] = tool_dict

View file

@ -17,7 +17,7 @@ from llama_stack.apis.models import Model
from llama_stack.apis.scoring_functions import ScoringFn from llama_stack.apis.scoring_functions import ScoringFn
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.apis.tools import ToolGroup from llama_stack.apis.tools import ToolGroup
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_stores import VectorStore
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol):
async def unregister_shield(self, identifier: str) -> None: ... async def unregister_shield(self, identifier: str) -> None: ...
class VectorDBsProtocolPrivate(Protocol): class VectorStoresProtocolPrivate(Protocol):
async def register_vector_db(self, vector_db: VectorDB) -> None: ... async def register_vector_store(self, vector_store: VectorStore) -> None: ...
async def unregister_vector_db(self, vector_db_id: str) -> None: ... async def unregister_vector_store(self, vector_store_id: str) -> None: ...
class DatasetsProtocolPrivate(Protocol): class DatasetsProtocolPrivate(Protocol):

View file

@ -17,21 +17,21 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorDBsProtocolPrivate from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import FaissVectorIOConfig from .config import FaissVectorIOConfig
logger = get_logger(name=__name__, category="vector_io") logger = get_logger(name=__name__, category="vector_io")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::" FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
@ -176,28 +176,28 @@ class FaissIndex(EmbeddingIndex):
) )
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorStoreWithIndex] = {}
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence) self.kvstore = await kvstore_impl(self.config.persistence)
# Load existing banks from kvstore # Load existing banks from kvstore
start_key = VECTOR_DBS_PREFIX start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff" end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs: for vector_store_data in stored_vector_stores:
vector_db = VectorDB.model_validate_json(vector_db_data) vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorDBWithIndex( index = VectorStoreWithIndex(
vector_db, vector_store,
await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
self.inference_api, self.inference_api,
) )
self.cache[vector_db.identifier] = index self.cache[vector_store.identifier] = index
# Load existing OpenAI vector stores into the in-memory cache # Load existing OpenAI vector stores into the in-memory cache
await self.initialize_openai_vector_stores() await self.initialize_openai_vector_stores()
@ -222,32 +222,31 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
except Exception as e: except Exception as e:
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}") return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
assert self.kvstore is not None assert self.kvstore is not None
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_db.model_dump_json()) await self.kvstore.set(key=key, value=vector_store.model_dump_json())
# Store in cache # Store in cache
self.cache[vector_db.identifier] = VectorDBWithIndex( self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_db=vector_db, vector_store=vector_store,
index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier), index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
inference_api=self.inference_api, inference_api=self.inference_api,
) )
async def list_vector_dbs(self) -> list[VectorDB]: async def list_vector_stores(self) -> list[VectorStore]:
return [i.vector_db for i in self.cache.values()] return [i.vector_store for i in self.cache.values()]
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
assert self.kvstore is not None assert self.kvstore is not None
if vector_db_id not in self.cache: if vector_store_id not in self.cache:
logger.warning(f"Vector DB {vector_db_id} not found")
return return
await self.cache[vector_db_id].index.delete() await self.cache[vector_store_id].index.delete()
del self.cache[vector_db_id] del self.cache[vector_store_id]
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}") await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = self.cache.get(vector_db_id) index = self.cache.get(vector_db_id)

View file

@ -17,10 +17,10 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_RRF, RERANKER_TYPE_RRF,
ChunkForDeletion, ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorStoreWithIndex,
) )
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
@ -41,7 +41,7 @@ HYBRID_SEARCH = "hybrid"
SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH} SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH}
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::"
@ -374,32 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex):
await asyncio.to_thread(_delete_chunks) await asyncio.to_thread(_delete_chunks)
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
""" """
A VectorIO implementation using SQLite + sqlite_vec. A VectorIO implementation using SQLite + sqlite_vec.
This class handles vector database registration (with metadata stored in a table named `vector_dbs`) This class handles vector database registration (with metadata stored in a table named `vector_stores`)
and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex). and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
""" """
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None: def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.cache: dict[str, VectorDBWithIndex] = {} self.cache: dict[str, VectorStoreWithIndex] = {}
self.vector_db_store = None self.vector_store_table = None
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence) self.kvstore = await kvstore_impl(self.config.persistence)
start_key = VECTOR_DBS_PREFIX start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff" end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for db_json in stored_vector_dbs: for db_json in stored_vector_stores:
vector_db = VectorDB.model_validate_json(db_json) vector_store = VectorStore.model_validate_json(db_json)
index = await SQLiteVecIndex.create( index = await SQLiteVecIndex.create(
vector_db.embedding_dimension, self.config.db_path, vector_db.identifier vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
) )
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
# Load existing OpenAI vector stores into the in-memory cache # Load existing OpenAI vector stores into the in-memory cache
await self.initialize_openai_vector_stores() await self.initialize_openai_vector_stores()
@ -408,63 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
# Clean up mixin resources (file batch tasks) # Clean up mixin resources (file batch tasks)
await super().shutdown() await super().shutdown()
async def list_vector_dbs(self) -> list[VectorDB]: async def list_vector_stores(self) -> list[VectorStore]:
return [v.vector_db for v in self.cache.values()] return [v.vector_store for v in self.cache.values()]
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier) index = await SQLiteVecIndex.create(
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api) vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
)
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_db_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_db_id] return self.cache[vector_store_id]
if self.vector_db_store is None: if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
vector_db = self.vector_db_store.get_vector_db(vector_db_id) vector_store = self.vector_store_table.get_vector_store(vector_store_id)
if not vector_db: if not vector_store:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
index = VectorDBWithIndex( index = VectorStoreWithIndex(
vector_db=vector_db, vector_store=vector_store,
index=SQLiteVecIndex( index=SQLiteVecIndex(
dimension=vector_db.embedding_dimension, dimension=vector_store.embedding_dimension,
db_path=self.config.db_path, db_path=self.config.db_path,
bank_id=vector_db.identifier, bank_id=vector_store.identifier,
kvstore=self.kvstore, kvstore=self.kvstore,
), ),
inference_api=self.inference_api, inference_api=self.inference_api,
) )
self.cache[vector_db_id] = index self.cache[vector_store_id] = index
return index return index
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_db_id not in self.cache: if vector_store_id not in self.cache:
logger.warning(f"Vector DB {vector_db_id} not found")
return return
await self.cache[vector_db_id].index.delete() await self.cache[vector_store_id].index.delete()
del self.cache[vector_db_id] del self.cache[vector_store_id]
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
# The VectorDBWithIndex helper is expected to compute embeddings via the inference_api # The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
# and then call our index's add_chunks. # and then call our index's add_chunks.
await index.insert_chunks(chunks) await index.insert_chunks(chunks)
async def query_chunks( async def query_chunks(
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
) -> QueryChunksResponse: ) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a sqlite_vec index.""" """Delete chunks from a sqlite_vec index."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_store_index(store_id)
if not index: if not index:
raise VectorStoreNotFoundError(store_id) raise VectorStoreNotFoundError(store_id)

View file

@ -13,15 +13,15 @@ from numpy.typing import NDArray
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
@ -30,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma")
ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::"
@ -114,7 +114,7 @@ class ChromaIndex(EmbeddingIndex):
raise NotImplementedError("Hybrid search is not supported in Chroma") raise NotImplementedError("Hybrid search is not supported in Chroma")
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__( def __init__(
self, self,
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig, config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
@ -127,11 +127,11 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.inference_api = inference_api self.inference_api = inference_api
self.client = None self.client = None
self.cache = {} self.cache = {}
self.vector_db_store = None self.vector_store_table = None
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence) self.kvstore = await kvstore_impl(self.config.persistence)
self.vector_db_store = self.kvstore self.vector_store_table = self.kvstore
if isinstance(self.config, RemoteChromaVectorIOConfig): if isinstance(self.config, RemoteChromaVectorIOConfig):
log.info(f"Connecting to Chroma server at: {self.config.url}") log.info(f"Connecting to Chroma server at: {self.config.url}")
@ -151,26 +151,26 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks) # Clean up mixin resources (file batch tasks)
await super().shutdown() await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
collection = await maybe_await( collection = await maybe_await(
self.client.get_or_create_collection( self.client.get_or_create_collection(
name=vector_db.identifier, metadata={"vector_db": vector_db.model_dump_json()} name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()}
) )
) )
self.cache[vector_db.identifier] = VectorDBWithIndex( self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_db, ChromaIndex(self.client, collection), self.inference_api vector_store, ChromaIndex(self.client, collection), self.inference_api
) )
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_db_id not in self.cache: if vector_store_id not in self.cache:
log.warning(f"Vector DB {vector_db_id} not found") log.warning(f"Vector DB {vector_store_id} not found")
return return
await self.cache[vector_db_id].index.delete() await self.cache[vector_store_id].index.delete()
del self.cache[vector_db_id] del self.cache[vector_store_id]
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if index is None: if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
@ -179,30 +179,30 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def query_chunks( async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse: ) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if index is None: if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
if vector_db_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_db_id] return self.cache[vector_store_id]
vector_db = await self.vector_db_store.get_vector_db(vector_db_id) vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_db: if not vector_store:
raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack") raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
collection = await maybe_await(self.client.get_collection(vector_db_id)) collection = await maybe_await(self.client.get_collection(vector_store_id))
if not collection: if not collection:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api) index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
self.cache[vector_db_id] = index self.cache[vector_store_id] = index
return index return index
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a Chroma vector store.""" """Delete chunks from a Chroma vector store."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_store_index(store_id)
if not index: if not index:
raise ValueError(f"Vector DB {store_id} not found") raise ValueError(f"Vector DB {store_id} not found")

View file

@ -14,10 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
@ -26,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_WEIGHTED, RERANKER_TYPE_WEIGHTED,
ChunkForDeletion, ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorStoreWithIndex,
) )
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
@ -35,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
logger = get_logger(name=__name__, category="vector_io::milvus") logger = get_logger(name=__name__, category="vector_io::milvus")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::"
@ -261,7 +261,7 @@ class MilvusIndex(EmbeddingIndex):
raise raise
class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__( def __init__(
self, self,
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig, config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
@ -273,28 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.cache = {} self.cache = {}
self.client = None self.client = None
self.inference_api = inference_api self.inference_api = inference_api
self.vector_db_store = None self.vector_store_table = None
self.metadata_collection_name = "openai_vector_stores_metadata" self.metadata_collection_name = "openai_vector_stores_metadata"
async def initialize(self) -> None: async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.persistence) self.kvstore = await kvstore_impl(self.config.persistence)
start_key = VECTOR_DBS_PREFIX start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff" end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs: for vector_store_data in stored_vector_stores:
vector_db = VectorDB.model_validate_json(vector_db_data) vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorDBWithIndex( index = VectorStoreWithIndex(
vector_db, vector_store,
index=MilvusIndex( index=MilvusIndex(
client=self.client, client=self.client,
collection_name=vector_db.identifier, collection_name=vector_store.identifier,
consistency_level=self.config.consistency_level, consistency_level=self.config.consistency_level,
kvstore=self.kvstore, kvstore=self.kvstore,
), ),
inference_api=self.inference_api, inference_api=self.inference_api,
) )
self.cache[vector_db.identifier] = index self.cache[vector_store.identifier] = index
if isinstance(self.config, RemoteMilvusVectorIOConfig): if isinstance(self.config, RemoteMilvusVectorIOConfig):
logger.info(f"Connecting to Milvus server at {self.config.uri}") logger.info(f"Connecting to Milvus server at {self.config.uri}")
self.client = MilvusClient(**self.config.model_dump(exclude_none=True)) self.client = MilvusClient(**self.config.model_dump(exclude_none=True))
@ -311,45 +311,45 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks) # Clean up mixin resources (file batch tasks)
await super().shutdown() await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
if isinstance(self.config, RemoteMilvusVectorIOConfig): if isinstance(self.config, RemoteMilvusVectorIOConfig):
consistency_level = self.config.consistency_level consistency_level = self.config.consistency_level
else: else:
consistency_level = "Strong" consistency_level = "Strong"
index = VectorDBWithIndex( index = VectorStoreWithIndex(
vector_db=vector_db, vector_store=vector_store,
index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level), index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
inference_api=self.inference_api, inference_api=self.inference_api,
) )
self.cache[vector_db.identifier] = index self.cache[vector_store.identifier] = index
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_db_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_db_id] return self.cache[vector_store_id]
if self.vector_db_store is None: if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id) vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_db: if not vector_store:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
index = VectorDBWithIndex( index = VectorStoreWithIndex(
vector_db=vector_db, vector_store=vector_store,
index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore), index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
inference_api=self.inference_api, inference_api=self.inference_api,
) )
self.cache[vector_db_id] = index self.cache[vector_store_id] = index
return index return index
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_db_id in self.cache: if vector_store_id in self.cache:
await self.cache[vector_db_id].index.delete() await self.cache[vector_store_id].index.delete()
del self.cache[vector_db_id] del self.cache[vector_store_id]
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
@ -358,14 +358,14 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def query_chunks( async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse: ) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a milvus vector store.""" """Delete a chunk from a milvus vector store."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_store_index(store_id)
if not index: if not index:
raise VectorStoreNotFoundError(store_id) raise VectorStoreNotFoundError(store_id)

View file

@ -16,15 +16,15 @@ from pydantic import BaseModel, TypeAdapter
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name
from .config import PGVectorVectorIOConfig from .config import PGVectorVectorIOConfig
@ -32,7 +32,7 @@ from .config import PGVectorVectorIOConfig
log = get_logger(name=__name__, category="vector_io::pgvector") log = get_logger(name=__name__, category="vector_io::pgvector")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:pgvector:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:pgvector:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:pgvector:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:pgvector:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:pgvector:{VERSION}::"
@ -79,13 +79,13 @@ class PGVectorIndex(EmbeddingIndex):
def __init__( def __init__(
self, self,
vector_db: VectorDB, vector_store: VectorStore,
dimension: int, dimension: int,
conn: psycopg2.extensions.connection, conn: psycopg2.extensions.connection,
kvstore: KVStore | None = None, kvstore: KVStore | None = None,
distance_metric: str = "COSINE", distance_metric: str = "COSINE",
): ):
self.vector_db = vector_db self.vector_store = vector_store
self.dimension = dimension self.dimension = dimension
self.conn = conn self.conn = conn
self.kvstore = kvstore self.kvstore = kvstore
@ -97,9 +97,9 @@ class PGVectorIndex(EmbeddingIndex):
try: try:
with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: with self.conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
# Sanitize the table name by replacing hyphens with underscores # Sanitize the table name by replacing hyphens with underscores
# SQL doesn't allow hyphens in table names, and vector_db.identifier may contain hyphens # SQL doesn't allow hyphens in table names, and vector_store.identifier may contain hyphens
# when created with patterns like "test-vector-db-{uuid4()}" # when created with patterns like "test-vector-db-{uuid4()}"
sanitized_identifier = sanitize_collection_name(self.vector_db.identifier) sanitized_identifier = sanitize_collection_name(self.vector_store.identifier)
self.table_name = f"vs_{sanitized_identifier}" self.table_name = f"vs_{sanitized_identifier}"
cur.execute( cur.execute(
@ -122,8 +122,8 @@ class PGVectorIndex(EmbeddingIndex):
""" """
) )
except Exception as e: except Exception as e:
log.exception(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") log.exception(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}")
raise RuntimeError(f"Error creating PGVectorIndex for vector_db: {self.vector_db.identifier}") from e raise RuntimeError(f"Error creating PGVectorIndex for vector_store: {self.vector_store.identifier}") from e
async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray): async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
assert len(chunks) == len(embeddings), ( assert len(chunks) == len(embeddings), (
@ -323,7 +323,7 @@ class PGVectorIndex(EmbeddingIndex):
) )
class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__( def __init__(
self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None self, config: PGVectorVectorIOConfig, inference_api: Inference, files_api: Files | None = None
) -> None: ) -> None:
@ -332,7 +332,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
self.inference_api = inference_api self.inference_api = inference_api
self.conn = None self.conn = None
self.cache = {} self.cache = {}
self.vector_db_store = None self.vector_store_table = None
self.metadata_collection_name = "openai_vector_stores_metadata" self.metadata_collection_name = "openai_vector_stores_metadata"
async def initialize(self) -> None: async def initialize(self) -> None:
@ -375,59 +375,59 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoco
# Clean up mixin resources (file batch tasks) # Clean up mixin resources (file batch tasks)
await super().shutdown() await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
# Persist vector DB metadata in the KV store # Persist vector DB metadata in the KV store
assert self.kvstore is not None assert self.kvstore is not None
# Upsert model metadata in Postgres # Upsert model metadata in Postgres
upsert_models(self.conn, [(vector_db.identifier, vector_db)]) upsert_models(self.conn, [(vector_store.identifier, vector_store)])
# Create and cache the PGVector index table for the vector DB # Create and cache the PGVector index table for the vector DB
pgvector_index = PGVectorIndex( pgvector_index = PGVectorIndex(
vector_db=vector_db, dimension=vector_db.embedding_dimension, conn=self.conn, kvstore=self.kvstore vector_store=vector_store, dimension=vector_store.embedding_dimension, conn=self.conn, kvstore=self.kvstore
) )
await pgvector_index.initialize() await pgvector_index.initialize()
index = VectorDBWithIndex(vector_db, index=pgvector_index, inference_api=self.inference_api) index = VectorStoreWithIndex(vector_store, index=pgvector_index, inference_api=self.inference_api)
self.cache[vector_db.identifier] = index self.cache[vector_store.identifier] = index
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
# Remove provider index and cache # Remove provider index and cache
if vector_db_id in self.cache: if vector_store_id in self.cache:
await self.cache[vector_db_id].index.delete() await self.cache[vector_store_id].index.delete()
del self.cache[vector_db_id] del self.cache[vector_store_id]
# Delete vector DB metadata from KV store # Delete vector DB metadata from KV store
assert self.kvstore is not None assert self.kvstore is not None
await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_db_id}") await self.kvstore.delete(key=f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
await index.insert_chunks(chunks) await index.insert_chunks(chunks)
async def query_chunks( async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse: ) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex: async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
if vector_db_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_db_id] return self.cache[vector_store_id]
if self.vector_db_store is None: if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id) vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_db: if not vector_store:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.conn) index = PGVectorIndex(vector_store, vector_store.embedding_dimension, self.conn)
await index.initialize() await index.initialize()
self.cache[vector_db_id] = VectorDBWithIndex(vector_db, index, self.inference_api) self.cache[vector_store_id] = VectorStoreWithIndex(vector_store, index, self.inference_api)
return self.cache[vector_db_id] return self.cache[vector_store_id]
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a PostgreSQL vector store.""" """Delete a chunk from a PostgreSQL vector store."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_store_index(store_id)
if not index: if not index:
raise VectorStoreNotFoundError(store_id) raise VectorStoreNotFoundError(store_id)

View file

@ -16,7 +16,6 @@ from qdrant_client.models import PointStruct
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
QueryChunksResponse, QueryChunksResponse,
@ -24,12 +23,13 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategy, VectorStoreChunkingStrategy,
VectorStoreFileObject, VectorStoreFileObject,
) )
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig
@ -38,7 +38,7 @@ CHUNK_ID_KEY = "_chunk_id"
# KV store prefixes for vector databases # KV store prefixes for vector databases
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:qdrant:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:qdrant:{VERSION}::"
def convert_id(_id: str) -> str: def convert_id(_id: str) -> str:
@ -145,7 +145,7 @@ class QdrantIndex(EmbeddingIndex):
await self.client.delete_collection(collection_name=self.collection_name) await self.client.delete_collection(collection_name=self.collection_name)
class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__( def __init__(
self, self,
config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig, config: RemoteQdrantVectorIOConfig | InlineQdrantVectorIOConfig,
@ -157,7 +157,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.client: AsyncQdrantClient = None self.client: AsyncQdrantClient = None
self.cache = {} self.cache = {}
self.inference_api = inference_api self.inference_api = inference_api
self.vector_db_store = None self.vector_store_table = None
self._qdrant_lock = asyncio.Lock() self._qdrant_lock = asyncio.Lock()
async def initialize(self) -> None: async def initialize(self) -> None:
@ -167,12 +167,14 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
start_key = VECTOR_DBS_PREFIX start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff" end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key) stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs: for vector_store_data in stored_vector_stores:
vector_db = VectorDB.model_validate_json(vector_db_data) vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorDBWithIndex(vector_db, QdrantIndex(self.client, vector_db.identifier), self.inference_api) index = VectorStoreWithIndex(
self.cache[vector_db.identifier] = index vector_store, QdrantIndex(self.client, vector_store.identifier), self.inference_api
)
self.cache[vector_store.identifier] = index
self.openai_vector_stores = await self._load_openai_vector_stores() self.openai_vector_stores = await self._load_openai_vector_stores()
async def shutdown(self) -> None: async def shutdown(self) -> None:
@ -180,46 +182,48 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks) # Clean up mixin resources (file batch tasks)
await super().shutdown() await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
assert self.kvstore is not None assert self.kvstore is not None
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}" key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_db.model_dump_json()) await self.kvstore.set(key=key, value=vector_store.model_dump_json())
index = VectorDBWithIndex( index = VectorStoreWithIndex(
vector_db=vector_db, index=QdrantIndex(self.client, vector_db.identifier), inference_api=self.inference_api vector_store=vector_store,
) index=QdrantIndex(self.client, vector_store.identifier),
self.cache[vector_db.identifier] = index
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id in self.cache:
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
assert self.kvstore is not None
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
if self.vector_db_store is None:
raise ValueError(f"Vector DB not found {vector_db_id}")
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
index = VectorDBWithIndex(
vector_db=vector_db,
index=QdrantIndex(client=self.client, collection_name=vector_db.identifier),
inference_api=self.inference_api, inference_api=self.inference_api,
) )
self.cache[vector_db_id] = index
self.cache[vector_store.identifier] = index
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id in self.cache:
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
assert self.kvstore is not None
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_store_table is None:
raise ValueError(f"Vector DB not found {vector_store_id}")
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
index = VectorStoreWithIndex(
vector_store=vector_store,
index=QdrantIndex(client=self.client, collection_name=vector_store.identifier),
inference_api=self.inference_api,
)
self.cache[vector_store_id] = index
return index return index
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
@ -228,7 +232,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def query_chunks( async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse: ) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
@ -249,7 +253,7 @@ class QdrantVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a Qdrant vector store.""" """Delete chunks from a Qdrant vector store."""
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_store_index(store_id)
if not index: if not index:
raise ValueError(f"Vector DB {store_id} not found") raise ValueError(f"Vector DB {store_id} not found")

View file

@ -16,11 +16,11 @@ from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -28,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_RRF, RERANKER_TYPE_RRF,
ChunkForDeletion, ChunkForDeletion,
EmbeddingIndex, EmbeddingIndex,
VectorDBWithIndex, VectorStoreWithIndex,
) )
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
@ -37,7 +37,7 @@ from .config import WeaviateVectorIOConfig
log = get_logger(name=__name__, category="vector_io::weaviate") log = get_logger(name=__name__, category="vector_io::weaviate")
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:weaviate:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:weaviate:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::" VECTOR_INDEX_PREFIX = f"vector_index:weaviate:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:weaviate:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:weaviate:{VERSION}::"
@ -257,14 +257,14 @@ class WeaviateIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=chunks, scores=scores) return QueryChunksResponse(chunks=chunks, scores=scores)
class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorDBsProtocolPrivate): class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProviderData, VectorStoresProtocolPrivate):
def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None: def __init__(self, config: WeaviateVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None) super().__init__(files_api=files_api, kvstore=None)
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api
self.client_cache = {} self.client_cache = {}
self.cache = {} self.cache = {}
self.vector_db_store = None self.vector_store_table = None
self.metadata_collection_name = "openai_vector_stores_metadata" self.metadata_collection_name = "openai_vector_stores_metadata"
def _get_client(self) -> weaviate.WeaviateClient: def _get_client(self) -> weaviate.WeaviateClient:
@ -300,11 +300,11 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
end_key = f"{VECTOR_DBS_PREFIX}\xff" end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored = await self.kvstore.values_in_range(start_key, end_key) stored = await self.kvstore.values_in_range(start_key, end_key)
for raw in stored: for raw in stored:
vector_db = VectorDB.model_validate_json(raw) vector_store = VectorStore.model_validate_json(raw)
client = self._get_client() client = self._get_client()
idx = WeaviateIndex(client=client, collection_name=vector_db.identifier, kvstore=self.kvstore) idx = WeaviateIndex(client=client, collection_name=vector_store.identifier, kvstore=self.kvstore)
self.cache[vector_db.identifier] = VectorDBWithIndex( self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_db=vector_db, index=idx, inference_api=self.inference_api vector_store=vector_store, index=idx, inference_api=self.inference_api
) )
# Load OpenAI vector stores metadata into cache # Load OpenAI vector stores metadata into cache
@ -316,9 +316,9 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
# Clean up mixin resources (file batch tasks) # Clean up mixin resources (file batch tasks)
await super().shutdown() await super().shutdown()
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
client = self._get_client() client = self._get_client()
sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
# Create collection if it doesn't exist # Create collection if it doesn't exist
if not client.collections.exists(sanitized_collection_name): if not client.collections.exists(sanitized_collection_name):
client.collections.create( client.collections.create(
@ -329,45 +329,45 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
], ],
) )
self.cache[vector_db.identifier] = VectorDBWithIndex( self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_db, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api vector_store, WeaviateIndex(client=client, collection_name=sanitized_collection_name), self.inference_api
) )
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
client = self._get_client() client = self._get_client()
sanitized_collection_name = sanitize_collection_name(vector_db_id, weaviate_format=True) sanitized_collection_name = sanitize_collection_name(vector_store_id, weaviate_format=True)
if vector_db_id not in self.cache or client.collections.exists(sanitized_collection_name) is False: if vector_store_id not in self.cache or client.collections.exists(sanitized_collection_name) is False:
return return
client.collections.delete(sanitized_collection_name) client.collections.delete(sanitized_collection_name)
await self.cache[vector_db_id].index.delete() await self.cache[vector_store_id].index.delete()
del self.cache[vector_db_id] del self.cache[vector_store_id]
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None: async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_db_id in self.cache: if vector_store_id in self.cache:
return self.cache[vector_db_id] return self.cache[vector_store_id]
if self.vector_db_store is None: if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id) vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_db: if not vector_store:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_store_id)
client = self._get_client() client = self._get_client()
sanitized_collection_name = sanitize_collection_name(vector_db.identifier, weaviate_format=True) sanitized_collection_name = sanitize_collection_name(vector_store.identifier, weaviate_format=True)
if not client.collections.exists(sanitized_collection_name): if not client.collections.exists(sanitized_collection_name):
raise ValueError(f"Collection with name `{sanitized_collection_name}` not found") raise ValueError(f"Collection with name `{sanitized_collection_name}` not found")
index = VectorDBWithIndex( index = VectorStoreWithIndex(
vector_db=vector_db, vector_store=vector_store,
index=WeaviateIndex(client=client, collection_name=vector_db.identifier), index=WeaviateIndex(client=client, collection_name=vector_store.identifier),
inference_api=self.inference_api, inference_api=self.inference_api,
) )
self.cache[vector_db_id] = index self.cache[vector_store_id] = index
return index return index
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None: async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
@ -376,14 +376,14 @@ class WeaviateVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, NeedsRequestProv
async def query_chunks( async def query_chunks(
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse: ) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id) index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index: if not index:
raise VectorStoreNotFoundError(vector_db_id) raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None: async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
index = await self._get_and_cache_vector_db_index(store_id) index = await self._get_and_cache_vector_store_index(store_id)
if not index: if not index:
raise ValueError(f"Vector DB {store_id} not found") raise ValueError(f"Vector DB {store_id} not found")

View file

@ -17,7 +17,6 @@ from pydantic import TypeAdapter
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files, OpenAIFileObject from llama_stack.apis.files import Files, OpenAIFileObject
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody, OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@ -43,6 +42,7 @@ from llama_stack.apis.vector_io import (
VectorStoreSearchResponse, VectorStoreSearchResponse,
VectorStoreSearchResponsePage, VectorStoreSearchResponsePage,
) )
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.id_generation import generate_object_id from llama_stack.core.id_generation import generate_object_id
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.kvstore.api import KVStore
@ -63,7 +63,7 @@ MAX_CONCURRENT_FILES_PER_BATCH = 3 # Maximum concurrent file processing within
FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size FILE_BATCH_CHUNK_SIZE = 10 # Process files in chunks of this size
VERSION = "v3" VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::" VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::" OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::" OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::" OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_contents:{VERSION}::"
@ -321,12 +321,12 @@ class OpenAIVectorStoreMixin(ABC):
pass pass
@abstractmethod @abstractmethod
async def register_vector_db(self, vector_db: VectorDB) -> None: async def register_vector_store(self, vector_store: VectorStore) -> None:
"""Register a vector database (provider-specific implementation).""" """Register a vector database (provider-specific implementation)."""
pass pass
@abstractmethod @abstractmethod
async def unregister_vector_db(self, vector_db_id: str) -> None: async def unregister_vector_store(self, vector_store_id: str) -> None:
"""Unregister a vector database (provider-specific implementation).""" """Unregister a vector database (provider-specific implementation)."""
pass pass
@ -358,7 +358,7 @@ class OpenAIVectorStoreMixin(ABC):
extra_body = params.model_extra or {} extra_body = params.model_extra or {}
metadata = params.metadata or {} metadata = params.metadata or {}
provider_vector_db_id = extra_body.get("provider_vector_db_id") provider_vector_store_id = extra_body.get("provider_vector_store_id")
# Use embedding info from metadata if available, otherwise from extra_body # Use embedding info from metadata if available, otherwise from extra_body
if metadata.get("embedding_model"): if metadata.get("embedding_model"):
@ -389,8 +389,8 @@ class OpenAIVectorStoreMixin(ABC):
# use provider_id set by router; fallback to provider's own ID when used directly via --stack-config # use provider_id set by router; fallback to provider's own ID when used directly via --stack-config
provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None) provider_id = extra_body.get("provider_id") or getattr(self, "__provider_id__", None)
# Derive the canonical vector_db_id (allow override, else generate) # Derive the canonical vector_store_id (allow override, else generate)
vector_db_id = provider_vector_db_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}") vector_store_id = provider_vector_store_id or generate_object_id("vector_store", lambda: f"vs_{uuid.uuid4()}")
if embedding_model is None: if embedding_model is None:
raise ValueError("embedding_model is required") raise ValueError("embedding_model is required")
@ -398,19 +398,20 @@ class OpenAIVectorStoreMixin(ABC):
if embedding_dimension is None: if embedding_dimension is None:
raise ValueError("Embedding dimension is required") raise ValueError("Embedding dimension is required")
# Register the VectorDB backing this vector store # Register the VectorStore backing this vector store
if provider_id is None: if provider_id is None:
raise ValueError("Provider ID is required but was not provided") raise ValueError("Provider ID is required but was not provided")
vector_db = VectorDB( # call to the provider to create any index, etc.
identifier=vector_db_id, vector_store = VectorStore(
identifier=vector_store_id,
embedding_dimension=embedding_dimension, embedding_dimension=embedding_dimension,
embedding_model=embedding_model, embedding_model=embedding_model,
provider_id=provider_id, provider_id=provider_id,
provider_resource_id=vector_db_id, provider_resource_id=vector_store_id,
vector_db_name=params.name, vector_store_name=params.name,
) )
await self.register_vector_db(vector_db) await self.register_vector_store(vector_store)
# Create OpenAI vector store metadata # Create OpenAI vector store metadata
status = "completed" status = "completed"
@ -424,7 +425,7 @@ class OpenAIVectorStoreMixin(ABC):
total=0, total=0,
) )
store_info: dict[str, Any] = { store_info: dict[str, Any] = {
"id": vector_db_id, "id": vector_store_id,
"object": "vector_store", "object": "vector_store",
"created_at": created_at, "created_at": created_at,
"name": params.name, "name": params.name,
@ -441,23 +442,23 @@ class OpenAIVectorStoreMixin(ABC):
# Add provider information to metadata if provided # Add provider information to metadata if provided
if provider_id: if provider_id:
metadata["provider_id"] = provider_id metadata["provider_id"] = provider_id
if provider_vector_db_id: if provider_vector_store_id:
metadata["provider_vector_db_id"] = provider_vector_db_id metadata["provider_vector_store_id"] = provider_vector_store_id
store_info["metadata"] = metadata store_info["metadata"] = metadata
# Save to persistent storage (provider-specific) # Save to persistent storage (provider-specific)
await self._save_openai_vector_store(vector_db_id, store_info) await self._save_openai_vector_store(vector_store_id, store_info)
# Store in memory cache # Store in memory cache
self.openai_vector_stores[vector_db_id] = store_info self.openai_vector_stores[vector_store_id] = store_info
# Now that our vector store is created, attach any files that were provided # Now that our vector store is created, attach any files that were provided
file_ids = params.file_ids or [] file_ids = params.file_ids or []
tasks = [self.openai_attach_file_to_vector_store(vector_db_id, file_id) for file_id in file_ids] tasks = [self.openai_attach_file_to_vector_store(vector_store_id, file_id) for file_id in file_ids]
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
# Get the updated store info and return it # Get the updated store info and return it
store_info = self.openai_vector_stores[vector_db_id] store_info = self.openai_vector_stores[vector_store_id]
return VectorStoreObject.model_validate(store_info) return VectorStoreObject.model_validate(store_info)
async def openai_list_vector_stores( async def openai_list_vector_stores(
@ -567,7 +568,7 @@ class OpenAIVectorStoreMixin(ABC):
# Also delete the underlying vector DB # Also delete the underlying vector DB
try: try:
await self.unregister_vector_db(vector_store_id) await self.unregister_vector_store(vector_store_id)
except Exception as e: except Exception as e:
logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}") logger.warning(f"Failed to delete underlying vector DB {vector_store_id}: {e}")

View file

@ -23,8 +23,8 @@ from llama_stack.apis.common.content_types import (
) )
from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody
from llama_stack.apis.tools import RAGDocument from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
@ -187,7 +187,7 @@ def make_overlapped_chunks(
updated_timestamp=int(time.time()), updated_timestamp=int(time.time()),
chunk_window=chunk_window, chunk_window=chunk_window,
chunk_tokenizer=default_tokenizer, chunk_tokenizer=default_tokenizer,
chunk_embedding_model=None, # This will be set in `VectorDBWithIndex.insert_chunks` chunk_embedding_model=None, # This will be set in `VectorStoreWithIndex.insert_chunks`
content_token_count=len(toks), content_token_count=len(toks),
metadata_token_count=len(metadata_tokens), metadata_token_count=len(metadata_tokens),
) )
@ -255,8 +255,8 @@ class EmbeddingIndex(ABC):
@dataclass @dataclass
class VectorDBWithIndex: class VectorStoreWithIndex:
vector_db: VectorDB vector_store: VectorStore
index: EmbeddingIndex index: EmbeddingIndex
inference_api: Api.inference inference_api: Api.inference
@ -269,14 +269,14 @@ class VectorDBWithIndex:
if c.embedding is None: if c.embedding is None:
chunks_to_embed.append(c) chunks_to_embed.append(c)
if c.chunk_metadata: if c.chunk_metadata:
c.chunk_metadata.chunk_embedding_model = self.vector_db.embedding_model c.chunk_metadata.chunk_embedding_model = self.vector_store.embedding_model
c.chunk_metadata.chunk_embedding_dimension = self.vector_db.embedding_dimension c.chunk_metadata.chunk_embedding_dimension = self.vector_store.embedding_dimension
else: else:
_validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) _validate_embedding(c.embedding, i, self.vector_store.embedding_dimension)
if chunks_to_embed: if chunks_to_embed:
params = OpenAIEmbeddingsRequestWithExtraBody( params = OpenAIEmbeddingsRequestWithExtraBody(
model=self.vector_db.embedding_model, model=self.vector_store.embedding_model,
input=[c.content for c in chunks_to_embed], input=[c.content for c in chunks_to_embed],
) )
resp = await self.inference_api.openai_embeddings(params) resp = await self.inference_api.openai_embeddings(params)
@ -319,7 +319,7 @@ class VectorDBWithIndex:
return await self.index.query_keyword(query_string, k, score_threshold) return await self.index.query_keyword(query_string, k, score_threshold)
params = OpenAIEmbeddingsRequestWithExtraBody( params = OpenAIEmbeddingsRequestWithExtraBody(
model=self.vector_db.embedding_model, model=self.vector_store.embedding_model,
input=[query_string], input=[query_string],
) )
embeddings_response = await self.inference_api.openai_embeddings(params) embeddings_response = await self.inference_api.openai_embeddings(params)

View file

@ -37,6 +37,9 @@ def pytest_sessionstart(session):
if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ: if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay" os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
if "LLAMA_STACK_LOGGING" not in os.environ:
os.environ["LLAMA_STACK_LOGGING"] = "all=warning"
if "SQLITE_STORE_DIR" not in os.environ: if "SQLITE_STORE_DIR" not in os.environ:
os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp() os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp()

View file

@ -49,46 +49,50 @@ def client_with_empty_registry(client_with_models):
@vector_provider_wrapper @vector_provider_wrapper
def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): def test_vector_store_retrieve(
vector_db_name = "test_vector_db" client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
):
vector_store_name = "test_vector_store"
create_response = client_with_empty_registry.vector_stores.create( create_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name, name=vector_store_name,
extra_body={ extra_body={
"provider_id": vector_io_provider_id, "provider_id": vector_io_provider_id,
}, },
) )
actual_vector_db_id = create_response.id actual_vector_store_id = create_response.id
# Retrieve the vector store and validate its properties # Retrieve the vector store and validate its properties
response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_db_id) response = client_with_empty_registry.vector_stores.retrieve(vector_store_id=actual_vector_store_id)
assert response is not None assert response is not None
assert response.id == actual_vector_db_id assert response.id == actual_vector_store_id
assert response.name == vector_db_name assert response.name == vector_store_name
assert response.id.startswith("vs_") assert response.id.startswith("vs_")
@vector_provider_wrapper @vector_provider_wrapper
def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id): def test_vector_store_register(
vector_db_name = "test_vector_db" client_with_empty_registry, embedding_model_id, embedding_dimension, vector_io_provider_id
):
vector_store_name = "test_vector_store"
response = client_with_empty_registry.vector_stores.create( response = client_with_empty_registry.vector_stores.create(
name=vector_db_name, name=vector_store_name,
extra_body={ extra_body={
"provider_id": vector_io_provider_id, "provider_id": vector_io_provider_id,
}, },
) )
actual_vector_db_id = response.id actual_vector_store_id = response.id
assert actual_vector_db_id.startswith("vs_") assert actual_vector_store_id.startswith("vs_")
assert actual_vector_db_id != vector_db_name assert actual_vector_store_id != vector_store_name
vector_stores = client_with_empty_registry.vector_stores.list() vector_stores = client_with_empty_registry.vector_stores.list()
assert len(vector_stores.data) == 1 assert len(vector_stores.data) == 1
vector_store = vector_stores.data[0] vector_store = vector_stores.data[0]
assert vector_store.id == actual_vector_db_id assert vector_store.id == actual_vector_store_id
assert vector_store.name == vector_db_name assert vector_store.name == vector_store_name
client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_db_id) client_with_empty_registry.vector_stores.delete(vector_store_id=actual_vector_store_id)
vector_stores = client_with_empty_registry.vector_stores.list() vector_stores = client_with_empty_registry.vector_stores.list()
assert len(vector_stores.data) == 0 assert len(vector_stores.data) == 0
@ -108,23 +112,23 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
def test_insert_chunks( def test_insert_chunks(
client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case, vector_io_provider_id
): ):
vector_db_name = "test_vector_db" vector_store_name = "test_vector_store"
create_response = client_with_empty_registry.vector_stores.create( create_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name, name=vector_store_name,
extra_body={ extra_body={
"provider_id": vector_io_provider_id, "provider_id": vector_io_provider_id,
}, },
) )
actual_vector_db_id = create_response.id actual_vector_store_id = create_response.id
client_with_empty_registry.vector_io.insert( client_with_empty_registry.vector_io.insert(
vector_db_id=actual_vector_db_id, vector_db_id=actual_vector_store_id,
chunks=sample_chunks, chunks=sample_chunks,
) )
response = client_with_empty_registry.vector_io.query( response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id, vector_db_id=actual_vector_store_id,
query="What is the capital of France?", query="What is the capital of France?",
) )
assert response is not None assert response is not None
@ -133,7 +137,7 @@ def test_insert_chunks(
query, expected_doc_id = test_case query, expected_doc_id = test_case
response = client_with_empty_registry.vector_io.query( response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id, vector_db_id=actual_vector_store_id,
query=query, query=query,
) )
assert response is not None assert response is not None
@ -151,15 +155,15 @@ def test_insert_chunks_with_precomputed_embeddings(
"inline::qdrant": {"score_threshold": -1.0}, "inline::qdrant": {"score_threshold": -1.0},
"remote::qdrant": {"score_threshold": -1.0}, "remote::qdrant": {"score_threshold": -1.0},
} }
vector_db_name = "test_precomputed_embeddings_db" vector_store_name = "test_precomputed_embeddings_db"
register_response = client_with_empty_registry.vector_stores.create( register_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name, name=vector_store_name,
extra_body={ extra_body={
"provider_id": vector_io_provider_id, "provider_id": vector_io_provider_id,
}, },
) )
actual_vector_db_id = register_response.id actual_vector_store_id = register_response.id
chunks_with_embeddings = [ chunks_with_embeddings = [
Chunk( Chunk(
@ -170,13 +174,13 @@ def test_insert_chunks_with_precomputed_embeddings(
] ]
client_with_empty_registry.vector_io.insert( client_with_empty_registry.vector_io.insert(
vector_db_id=actual_vector_db_id, vector_db_id=actual_vector_store_id,
chunks=chunks_with_embeddings, chunks=chunks_with_embeddings,
) )
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
response = client_with_empty_registry.vector_io.query( response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id, vector_db_id=actual_vector_store_id,
query="precomputed embedding test", query="precomputed embedding test",
params=vector_io_provider_params_dict.get(provider, None), params=vector_io_provider_params_dict.get(provider, None),
) )
@ -200,16 +204,16 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
"remote::qdrant": {"score_threshold": 0.0}, "remote::qdrant": {"score_threshold": 0.0},
"inline::qdrant": {"score_threshold": 0.0}, "inline::qdrant": {"score_threshold": 0.0},
} }
vector_db_name = "test_precomputed_embeddings_db" vector_store_name = "test_precomputed_embeddings_db"
register_response = client_with_empty_registry.vector_stores.create( register_response = client_with_empty_registry.vector_stores.create(
name=vector_db_name, name=vector_store_name,
extra_body={ extra_body={
"embedding_model": embedding_model_id, "embedding_model": embedding_model_id,
"provider_id": vector_io_provider_id, "provider_id": vector_io_provider_id,
}, },
) )
actual_vector_db_id = register_response.id actual_vector_store_id = register_response.id
chunks_with_embeddings = [ chunks_with_embeddings = [
Chunk( Chunk(
@ -220,13 +224,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
] ]
client_with_empty_registry.vector_io.insert( client_with_empty_registry.vector_io.insert(
vector_db_id=actual_vector_db_id, vector_db_id=actual_vector_store_id,
chunks=chunks_with_embeddings, chunks=chunks_with_embeddings,
) )
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0] provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
response = client_with_empty_registry.vector_io.query( response = client_with_empty_registry.vector_io.query(
vector_db_id=actual_vector_db_id, vector_db_id=actual_vector_store_id,
query="duplicate", query="duplicate",
params=vector_io_provider_params_dict.get(provider, None), params=vector_io_provider_params_dict.get(provider, None),
) )

View file

@ -21,7 +21,7 @@ async def test_single_provider_auto_selection():
Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384}) Mock(identifier="all-MiniLM-L6-v2", model_type="embedding", metadata={"embedding_dimension": 384})
] ]
) )
mock_routing_table.register_vector_db = AsyncMock( mock_routing_table.register_vector_store = AsyncMock(
return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123") return_value=Mock(identifier="vs_123", provider_id="inline::faiss", provider_resource_id="vs_123")
) )
mock_routing_table.get_provider_impl = AsyncMock( mock_routing_table.get_provider_impl = AsyncMock(

View file

@ -10,8 +10,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
import numpy as np import numpy as np
import pytest import pytest
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
@ -31,7 +31,7 @@ def vector_provider(request):
@pytest.fixture @pytest.fixture
def vector_db_id() -> str: def vector_store_id() -> str:
return f"test-vector-db-{random.randint(1, 100)}" return f"test-vector-db-{random.randint(1, 100)}"
@ -149,8 +149,8 @@ async def sqlite_vec_adapter(sqlite_vec_db_path, unique_kvstore_config, mock_inf
) )
collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}" collection_id = f"sqlite_test_collection_{np.random.randint(1e6)}"
await adapter.initialize() await adapter.initialize()
await adapter.register_vector_db( await adapter.register_vector_store(
VectorDB( VectorStore(
identifier=collection_id, identifier=collection_id,
provider_id="test_provider", provider_id="test_provider",
embedding_model="test_model", embedding_model="test_model",
@ -186,8 +186,8 @@ async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding
files_api=None, files_api=None,
) )
await adapter.initialize() await adapter.initialize()
await adapter.register_vector_db( await adapter.register_vector_store(
VectorDB( VectorStore(
identifier=f"faiss_test_collection_{np.random.randint(1e6)}", identifier=f"faiss_test_collection_{np.random.randint(1e6)}",
provider_id="test_provider", provider_id="test_provider",
embedding_model="test_model", embedding_model="test_model",
@ -215,7 +215,7 @@ def mock_psycopg2_connection():
async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection): async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection):
connection, cursor = mock_psycopg2_connection connection, cursor = mock_psycopg2_connection
vector_db = VectorDB( vector_store = VectorStore(
identifier="test-vector-db", identifier="test-vector-db",
embedding_model="test-model", embedding_model="test-model",
embedding_dimension=embedding_dimension, embedding_dimension=embedding_dimension,
@ -225,7 +225,7 @@ async def pgvector_vec_index(embedding_dimension, mock_psycopg2_connection):
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.psycopg2"):
with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"): with patch("llama_stack.providers.remote.vector_io.pgvector.pgvector.execute_values"):
index = PGVectorIndex(vector_db, embedding_dimension, connection, distance_metric="COSINE") index = PGVectorIndex(vector_store, embedding_dimension, connection, distance_metric="COSINE")
index._test_chunks = [] index._test_chunks = []
original_add_chunks = index.add_chunks original_add_chunks = index.add_chunks
@ -281,30 +281,30 @@ async def pgvector_vec_adapter(unique_kvstore_config, mock_inference_api, embedd
await adapter.initialize() await adapter.initialize()
adapter.conn = mock_conn adapter.conn = mock_conn
async def mock_insert_chunks(vector_db_id, chunks, ttl_seconds=None): async def mock_insert_chunks(vector_store_id, chunks, ttl_seconds=None):
index = await adapter._get_and_cache_vector_db_index(vector_db_id) index = await adapter._get_and_cache_vector_store_index(vector_store_id)
if not index: if not index:
raise ValueError(f"Vector DB {vector_db_id} not found") raise ValueError(f"Vector DB {vector_store_id} not found")
await index.insert_chunks(chunks) await index.insert_chunks(chunks)
adapter.insert_chunks = mock_insert_chunks adapter.insert_chunks = mock_insert_chunks
async def mock_query_chunks(vector_db_id, query, params=None): async def mock_query_chunks(vector_store_id, query, params=None):
index = await adapter._get_and_cache_vector_db_index(vector_db_id) index = await adapter._get_and_cache_vector_store_index(vector_store_id)
if not index: if not index:
raise ValueError(f"Vector DB {vector_db_id} not found") raise ValueError(f"Vector DB {vector_store_id} not found")
return await index.query_chunks(query, params) return await index.query_chunks(query, params)
adapter.query_chunks = mock_query_chunks adapter.query_chunks = mock_query_chunks
test_vector_db = VectorDB( test_vector_store = VectorStore(
identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}", identifier=f"pgvector_test_collection_{random.randint(1, 1_000_000)}",
provider_id="test_provider", provider_id="test_provider",
embedding_model="test_model", embedding_model="test_model",
embedding_dimension=embedding_dimension, embedding_dimension=embedding_dimension,
) )
await adapter.register_vector_db(test_vector_db) await adapter.register_vector_store(test_vector_store)
adapter.test_collection_id = test_vector_db.identifier adapter.test_collection_id = test_vector_store.identifier
yield adapter yield adapter
await adapter.shutdown() await adapter.shutdown()

View file

@ -11,8 +11,8 @@ import numpy as np
import pytest import pytest
from llama_stack.apis.files import Files from llama_stack.apis.files import Files
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.datatypes import HealthStatus
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import ( from llama_stack.providers.inline.vector_io.faiss.faiss import (
@ -43,8 +43,8 @@ def embedding_dimension():
@pytest.fixture @pytest.fixture
def vector_db_id(): def vector_store_id():
return "test_vector_db" return "test_vector_store"
@pytest.fixture @pytest.fixture
@ -61,12 +61,12 @@ def sample_embeddings(embedding_dimension):
@pytest.fixture @pytest.fixture
def mock_vector_db(vector_db_id, embedding_dimension) -> MagicMock: def mock_vector_store(vector_store_id, embedding_dimension) -> MagicMock:
mock_vector_db = MagicMock(spec=VectorDB) mock_vector_store = MagicMock(spec=VectorStore)
mock_vector_db.embedding_model = "mock_embedding_model" mock_vector_store.embedding_model = "mock_embedding_model"
mock_vector_db.identifier = vector_db_id mock_vector_store.identifier = vector_store_id
mock_vector_db.embedding_dimension = embedding_dimension mock_vector_store.embedding_dimension = embedding_dimension
return mock_vector_db return mock_vector_store
@pytest.fixture @pytest.fixture

View file

@ -12,7 +12,6 @@ import numpy as np
import pytest import pytest
from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import ( from llama_stack.apis.vector_io import (
Chunk, Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody, OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
@ -21,6 +20,7 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategyAuto, VectorStoreChunkingStrategyAuto,
VectorStoreFileObject, VectorStoreFileObject,
) )
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
# This test is a unit test for the inline VectorIO providers. This should only contain # This test is a unit test for the inline VectorIO providers. This should only contain
@ -71,7 +71,7 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio
async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
key = f"{VECTOR_DBS_PREFIX}db1" key = f"{VECTOR_DBS_PREFIX}db1"
dummy = VectorDB( dummy = VectorStore(
identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
) )
await vector_io_adapter.kvstore.set(key=key, value=json.dumps(dummy.model_dump())) await vector_io_adapter.kvstore.set(key=key, value=json.dumps(dummy.model_dump()))
@ -81,10 +81,10 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
async def test_persistence_across_adapter_restarts(vector_io_adapter): async def test_persistence_across_adapter_restarts(vector_io_adapter):
await vector_io_adapter.initialize() await vector_io_adapter.initialize()
dummy = VectorDB( dummy = VectorStore(
identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 identifier="foo_db", provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
) )
await vector_io_adapter.register_vector_db(dummy) await vector_io_adapter.register_vector_store(dummy)
await vector_io_adapter.shutdown() await vector_io_adapter.shutdown()
await vector_io_adapter.initialize() await vector_io_adapter.initialize()
@ -92,15 +92,15 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
await vector_io_adapter.shutdown() await vector_io_adapter.shutdown()
async def test_register_and_unregister_vector_db(vector_io_adapter): async def test_register_and_unregister_vector_store(vector_io_adapter):
unique_id = f"foo_db_{np.random.randint(1e6)}" unique_id = f"foo_db_{np.random.randint(1e6)}"
dummy = VectorDB( dummy = VectorStore(
identifier=unique_id, provider_id="test_provider", embedding_model="test_model", embedding_dimension=128 identifier=unique_id, provider_id="test_provider", embedding_model="test_model", embedding_dimension=128
) )
await vector_io_adapter.register_vector_db(dummy) await vector_io_adapter.register_vector_store(dummy)
assert dummy.identifier in vector_io_adapter.cache assert dummy.identifier in vector_io_adapter.cache
await vector_io_adapter.unregister_vector_db(dummy.identifier) await vector_io_adapter.unregister_vector_store(dummy.identifier)
assert dummy.identifier not in vector_io_adapter.cache assert dummy.identifier not in vector_io_adapter.cache
@ -121,7 +121,7 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
async def test_insert_chunks_missing_db_raises(vector_io_adapter): async def test_insert_chunks_missing_db_raises(vector_io_adapter):
vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None)
with pytest.raises(ValueError): with pytest.raises(ValueError):
await vector_io_adapter.insert_chunks("db_not_exist", []) await vector_io_adapter.insert_chunks("db_not_exist", [])
@ -170,7 +170,7 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter
async def test_query_chunks_missing_db_raises(vector_io_adapter): async def test_query_chunks_missing_db_raises(vector_io_adapter):
vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) vector_io_adapter._get_and_cache_vector_store_index = AsyncMock(return_value=None)
with pytest.raises(ValueError): with pytest.raises(ValueError):
await vector_io_adapter.query_chunks("db_missing", "q", None) await vector_io_adapter.query_chunks("db_missing", "q", None)
@ -182,7 +182,7 @@ async def test_save_openai_vector_store(vector_io_adapter):
"id": store_id, "id": store_id,
"name": "Test Store", "name": "Test Store",
"description": "A test OpenAI vector store", "description": "A test OpenAI vector store",
"vector_db_id": "test_db", "vector_store_id": "test_db",
"embedding_model": "test_model", "embedding_model": "test_model",
} }
@ -198,7 +198,7 @@ async def test_update_openai_vector_store(vector_io_adapter):
"id": store_id, "id": store_id,
"name": "Test Store", "name": "Test Store",
"description": "A test OpenAI vector store", "description": "A test OpenAI vector store",
"vector_db_id": "test_db", "vector_store_id": "test_db",
"embedding_model": "test_model", "embedding_model": "test_model",
} }
@ -214,7 +214,7 @@ async def test_delete_openai_vector_store(vector_io_adapter):
"id": store_id, "id": store_id,
"name": "Test Store", "name": "Test Store",
"description": "A test OpenAI vector store", "description": "A test OpenAI vector store",
"vector_db_id": "test_db", "vector_store_id": "test_db",
"embedding_model": "test_model", "embedding_model": "test_model",
} }
@ -229,7 +229,7 @@ async def test_load_openai_vector_stores(vector_io_adapter):
"id": store_id, "id": store_id,
"name": "Test Store", "name": "Test Store",
"description": "A test OpenAI vector store", "description": "A test OpenAI vector store",
"vector_db_id": "test_db", "vector_store_id": "test_db",
"embedding_model": "test_model", "embedding_model": "test_model",
} }
@ -998,8 +998,8 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
async def test_embedding_config_from_metadata(vector_io_adapter): async def test_embedding_config_from_metadata(vector_io_adapter):
"""Test that embedding configuration is correctly extracted from metadata.""" """Test that embedding configuration is correctly extracted from metadata."""
# Mock register_vector_db to avoid actual registration # Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock() vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter # Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider" vector_io_adapter.__provider_id__ = "test_provider"
@ -1015,9 +1015,9 @@ async def test_embedding_config_from_metadata(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params) await vector_io_adapter.openai_create_vector_store(params)
# Verify VectorDB was registered with correct embedding config from metadata # Verify VectorStore was registered with correct embedding config from metadata
vector_io_adapter.register_vector_db.assert_called_once() vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0] call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "test-embedding-model" assert call_args.embedding_model == "test-embedding-model"
assert call_args.embedding_dimension == 512 assert call_args.embedding_dimension == 512
@ -1025,8 +1025,8 @@ async def test_embedding_config_from_metadata(vector_io_adapter):
async def test_embedding_config_from_extra_body(vector_io_adapter): async def test_embedding_config_from_extra_body(vector_io_adapter):
"""Test that embedding configuration is correctly extracted from extra_body when metadata is empty.""" """Test that embedding configuration is correctly extracted from extra_body when metadata is empty."""
# Mock register_vector_db to avoid actual registration # Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock() vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter # Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider" vector_io_adapter.__provider_id__ = "test_provider"
@ -1042,9 +1042,9 @@ async def test_embedding_config_from_extra_body(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params) await vector_io_adapter.openai_create_vector_store(params)
# Verify VectorDB was registered with correct embedding config from extra_body # Verify VectorStore was registered with correct embedding config from extra_body
vector_io_adapter.register_vector_db.assert_called_once() vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0] call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "extra-body-model" assert call_args.embedding_model == "extra-body-model"
assert call_args.embedding_dimension == 1024 assert call_args.embedding_dimension == 1024
@ -1052,8 +1052,8 @@ async def test_embedding_config_from_extra_body(vector_io_adapter):
async def test_embedding_config_consistency_check_passes(vector_io_adapter): async def test_embedding_config_consistency_check_passes(vector_io_adapter):
"""Test that consistent embedding config in both metadata and extra_body passes validation.""" """Test that consistent embedding config in both metadata and extra_body passes validation."""
# Mock register_vector_db to avoid actual registration # Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock() vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter # Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider" vector_io_adapter.__provider_id__ = "test_provider"
@ -1073,8 +1073,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params) await vector_io_adapter.openai_create_vector_store(params)
# Should not raise any error and use metadata config # Should not raise any error and use metadata config
vector_io_adapter.register_vector_db.assert_called_once() vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0] call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "consistent-model" assert call_args.embedding_model == "consistent-model"
assert call_args.embedding_dimension == 768 assert call_args.embedding_dimension == 768
@ -1082,8 +1082,8 @@ async def test_embedding_config_consistency_check_passes(vector_io_adapter):
async def test_embedding_config_inconsistency_errors(vector_io_adapter): async def test_embedding_config_inconsistency_errors(vector_io_adapter):
"""Test that inconsistent embedding config between metadata and extra_body raises errors.""" """Test that inconsistent embedding config between metadata and extra_body raises errors."""
# Mock register_vector_db to avoid actual registration # Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock() vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter # Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider" vector_io_adapter.__provider_id__ = "test_provider"
@ -1104,7 +1104,7 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params) await vector_io_adapter.openai_create_vector_store(params)
# Reset mock for second test # Reset mock for second test
vector_io_adapter.register_vector_db.reset_mock() vector_io_adapter.register_vector_store.reset_mock()
# Test with inconsistent embedding dimension # Test with inconsistent embedding dimension
params = OpenAICreateVectorStoreRequestWithExtraBody( params = OpenAICreateVectorStoreRequestWithExtraBody(
@ -1126,8 +1126,8 @@ async def test_embedding_config_inconsistency_errors(vector_io_adapter):
async def test_embedding_config_defaults_when_missing(vector_io_adapter): async def test_embedding_config_defaults_when_missing(vector_io_adapter):
"""Test that embedding dimension defaults to 768 when not provided.""" """Test that embedding dimension defaults to 768 when not provided."""
# Mock register_vector_db to avoid actual registration # Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock() vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter # Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider" vector_io_adapter.__provider_id__ = "test_provider"
@ -1143,8 +1143,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter):
await vector_io_adapter.openai_create_vector_store(params) await vector_io_adapter.openai_create_vector_store(params)
# Should default to 768 dimensions # Should default to 768 dimensions
vector_io_adapter.register_vector_db.assert_called_once() vector_io_adapter.register_vector_store.assert_called_once()
call_args = vector_io_adapter.register_vector_db.call_args[0][0] call_args = vector_io_adapter.register_vector_store.call_args[0][0]
assert call_args.embedding_model == "model-without-dimension" assert call_args.embedding_model == "model-without-dimension"
assert call_args.embedding_dimension == 768 assert call_args.embedding_dimension == 768
@ -1152,8 +1152,8 @@ async def test_embedding_config_defaults_when_missing(vector_io_adapter):
async def test_embedding_config_required_model_missing(vector_io_adapter): async def test_embedding_config_required_model_missing(vector_io_adapter):
"""Test that missing embedding model raises error.""" """Test that missing embedding model raises error."""
# Mock register_vector_db to avoid actual registration # Mock register_vector_store to avoid actual registration
vector_io_adapter.register_vector_db = AsyncMock() vector_io_adapter.register_vector_store = AsyncMock()
# Set provider_id attribute for the adapter # Set provider_id attribute for the adapter
vector_io_adapter.__provider_id__ = "test_provider" vector_io_adapter.__provider_id__ = "test_provider"
# Mock the default model lookup to return None (no default model available) # Mock the default model lookup to return None (no default model available)

View file

@ -18,7 +18,7 @@ from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRunti
class TestRagQuery: class TestRagQuery:
async def test_query_raises_on_empty_vector_db_ids(self): async def test_query_raises_on_empty_vector_store_ids(self):
rag_tool = MemoryToolRuntimeImpl( rag_tool = MemoryToolRuntimeImpl(
config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock() config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), files_api=MagicMock()
) )
@ -82,7 +82,7 @@ class TestRagQuery:
with pytest.raises(ValueError): with pytest.raises(ValueError):
RAGQueryConfig(mode="wrong_mode") RAGQueryConfig(mode="wrong_mode")
async def test_query_adds_vector_db_id_to_chunk_metadata(self): async def test_query_adds_vector_store_id_to_chunk_metadata(self):
rag_tool = MemoryToolRuntimeImpl( rag_tool = MemoryToolRuntimeImpl(
config=MagicMock(), config=MagicMock(),
vector_io_api=MagicMock(), vector_io_api=MagicMock(),

View file

@ -21,7 +21,7 @@ from llama_stack.apis.tools import RAGDocument
from llama_stack.apis.vector_io import Chunk from llama_stack.apis.vector_io import Chunk
from llama_stack.providers.utils.memory.vector_store import ( from llama_stack.providers.utils.memory.vector_store import (
URL, URL,
VectorDBWithIndex, VectorStoreWithIndex,
_validate_embedding, _validate_embedding,
content_from_doc, content_from_doc,
make_overlapped_chunks, make_overlapped_chunks,
@ -206,15 +206,15 @@ class TestVectorStore:
assert str(excinfo.value.__cause__) == "Cannot convert to string" assert str(excinfo.value.__cause__) == "Cannot convert to string"
class TestVectorDBWithIndex: class TestVectorStoreWithIndex:
async def test_insert_chunks_without_embeddings(self): async def test_insert_chunks_without_embeddings(self):
mock_vector_db = MagicMock() mock_vector_store = MagicMock()
mock_vector_db.embedding_model = "test-model without embeddings" mock_vector_store.embedding_model = "test-model without embeddings"
mock_index = AsyncMock() mock_index = AsyncMock()
mock_inference_api = AsyncMock() mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex( vector_store_with_index = VectorStoreWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
) )
chunks = [ chunks = [
@ -227,7 +227,7 @@ class TestVectorDBWithIndex:
OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1), OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1),
] ]
await vector_db_with_index.insert_chunks(chunks) await vector_store_with_index.insert_chunks(chunks)
# Verify openai_embeddings was called with correct params # Verify openai_embeddings was called with correct params
mock_inference_api.openai_embeddings.assert_called_once() mock_inference_api.openai_embeddings.assert_called_once()
@ -243,14 +243,14 @@ class TestVectorDBWithIndex:
assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
async def test_insert_chunks_with_valid_embeddings(self): async def test_insert_chunks_with_valid_embeddings(self):
mock_vector_db = MagicMock() mock_vector_store = MagicMock()
mock_vector_db.embedding_model = "test-model with embeddings" mock_vector_store.embedding_model = "test-model with embeddings"
mock_vector_db.embedding_dimension = 3 mock_vector_store.embedding_dimension = 3
mock_index = AsyncMock() mock_index = AsyncMock()
mock_inference_api = AsyncMock() mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex( vector_store_with_index = VectorStoreWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
) )
chunks = [ chunks = [
@ -258,7 +258,7 @@ class TestVectorDBWithIndex:
Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}), Chunk(content="Test 2", embedding=[0.4, 0.5, 0.6], metadata={}),
] ]
await vector_db_with_index.insert_chunks(chunks) await vector_store_with_index.insert_chunks(chunks)
mock_inference_api.openai_embeddings.assert_not_called() mock_inference_api.openai_embeddings.assert_not_called()
mock_index.add_chunks.assert_called_once() mock_index.add_chunks.assert_called_once()
@ -267,14 +267,14 @@ class TestVectorDBWithIndex:
assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
async def test_insert_chunks_with_invalid_embeddings(self): async def test_insert_chunks_with_invalid_embeddings(self):
mock_vector_db = MagicMock() mock_vector_store = MagicMock()
mock_vector_db.embedding_dimension = 3 mock_vector_store.embedding_dimension = 3
mock_vector_db.embedding_model = "test-model with invalid embeddings" mock_vector_store.embedding_model = "test-model with invalid embeddings"
mock_index = AsyncMock() mock_index = AsyncMock()
mock_inference_api = AsyncMock() mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex( vector_store_with_index = VectorStoreWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
) )
# Verify Chunk raises ValueError for invalid embedding type # Verify Chunk raises ValueError for invalid embedding type
@ -283,7 +283,7 @@ class TestVectorDBWithIndex:
# Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called) # Verify Chunk raises ValueError for invalid embedding type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match="Input should be a valid list"): with pytest.raises(ValueError, match="Input should be a valid list"):
await vector_db_with_index.insert_chunks( await vector_store_with_index.insert_chunks(
[ [
Chunk(content="Test 1", embedding=None, metadata={}), Chunk(content="Test 1", embedding=None, metadata={}),
Chunk(content="Test 2", embedding="invalid_type", metadata={}), Chunk(content="Test 2", embedding="invalid_type", metadata={}),
@ -292,7 +292,7 @@ class TestVectorDBWithIndex:
# Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called) # Verify Chunk raises ValueError for invalid embedding element type in insert_chunks (i.e., Chunk errors before insert_chunks is called)
with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "): with pytest.raises(ValueError, match=" Input should be a valid number, unable to parse string as a number "):
await vector_db_with_index.insert_chunks( await vector_store_with_index.insert_chunks(
Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={}) Chunk(content="Test 1", embedding=[0.1, "string", 0.3], metadata={})
) )
@ -300,20 +300,20 @@ class TestVectorDBWithIndex:
Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}), Chunk(content="Test 1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={}),
] ]
with pytest.raises(ValueError, match="has dimension 4, expected 3"): with pytest.raises(ValueError, match="has dimension 4, expected 3"):
await vector_db_with_index.insert_chunks(chunks_wrong_dim) await vector_store_with_index.insert_chunks(chunks_wrong_dim)
mock_inference_api.openai_embeddings.assert_not_called() mock_inference_api.openai_embeddings.assert_not_called()
mock_index.add_chunks.assert_not_called() mock_index.add_chunks.assert_not_called()
async def test_insert_chunks_with_partially_precomputed_embeddings(self): async def test_insert_chunks_with_partially_precomputed_embeddings(self):
mock_vector_db = MagicMock() mock_vector_store = MagicMock()
mock_vector_db.embedding_model = "test-model with partial embeddings" mock_vector_store.embedding_model = "test-model with partial embeddings"
mock_vector_db.embedding_dimension = 3 mock_vector_store.embedding_dimension = 3
mock_index = AsyncMock() mock_index = AsyncMock()
mock_inference_api = AsyncMock() mock_inference_api = AsyncMock()
vector_db_with_index = VectorDBWithIndex( vector_store_with_index = VectorStoreWithIndex(
vector_db=mock_vector_db, index=mock_index, inference_api=mock_inference_api vector_store=mock_vector_store, index=mock_index, inference_api=mock_inference_api
) )
chunks = [ chunks = [
@ -327,7 +327,7 @@ class TestVectorDBWithIndex:
OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1), OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1),
] ]
await vector_db_with_index.insert_chunks(chunks) await vector_store_with_index.insert_chunks(chunks)
# Verify openai_embeddings was called with correct params # Verify openai_embeddings was called with correct params
mock_inference_api.openai_embeddings.assert_called_once() mock_inference_api.openai_embeddings.assert_called_once()

View file

@ -8,8 +8,8 @@
import pytest import pytest
from llama_stack.apis.inference import Model from llama_stack.apis.inference import Model
from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.datatypes import VectorDBWithOwner from llama_stack.core.datatypes import VectorStoreWithOwner
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
from llama_stack.core.store.registry import ( from llama_stack.core.store.registry import (
KEY_FORMAT, KEY_FORMAT,
@ -20,12 +20,12 @@ from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_b
@pytest.fixture @pytest.fixture
def sample_vector_db(): def sample_vector_store():
return VectorDB( return VectorStore(
identifier="test_vector_db", identifier="test_vector_store",
embedding_model="nomic-embed-text-v1.5", embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768, embedding_dimension=768,
provider_resource_id="test_vector_db", provider_resource_id="test_vector_store",
provider_id="test-provider", provider_id="test-provider",
) )
@ -45,17 +45,17 @@ async def test_registry_initialization(disk_dist_registry):
assert result is None assert result is None
async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model): async def test_basic_registration(disk_dist_registry, sample_vector_store, sample_model):
print(f"Registering {sample_vector_db}") print(f"Registering {sample_vector_store}")
await disk_dist_registry.register(sample_vector_db) await disk_dist_registry.register(sample_vector_store)
print(f"Registering {sample_model}") print(f"Registering {sample_model}")
await disk_dist_registry.register(sample_model) await disk_dist_registry.register(sample_model)
print("Getting vector_db") print("Getting vector_store")
result_vector_db = await disk_dist_registry.get("vector_db", "test_vector_db") result_vector_store = await disk_dist_registry.get("vector_store", "test_vector_store")
assert result_vector_db is not None assert result_vector_store is not None
assert result_vector_db.identifier == sample_vector_db.identifier assert result_vector_store.identifier == sample_vector_store.identifier
assert result_vector_db.embedding_model == sample_vector_db.embedding_model assert result_vector_store.embedding_model == sample_vector_store.embedding_model
assert result_vector_db.provider_id == sample_vector_db.provider_id assert result_vector_store.provider_id == sample_vector_store.provider_id
result_model = await disk_dist_registry.get("model", "test_model") result_model = await disk_dist_registry.get("model", "test_model")
assert result_model is not None assert result_model is not None
@ -63,11 +63,11 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m
assert result_model.provider_id == sample_model.provider_id assert result_model.provider_id == sample_model.provider_id
async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model): async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_store, sample_model):
# First populate the disk registry # First populate the disk registry
disk_registry = DiskDistributionRegistry(sqlite_kvstore) disk_registry = DiskDistributionRegistry(sqlite_kvstore)
await disk_registry.initialize() await disk_registry.initialize()
await disk_registry.register(sample_vector_db) await disk_registry.register(sample_vector_store)
await disk_registry.register(sample_model) await disk_registry.register(sample_model)
# Test cached version loads from disk # Test cached version loads from disk
@ -79,29 +79,29 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db,
) )
await cached_registry.initialize() await cached_registry.initialize()
result_vector_db = await cached_registry.get("vector_db", "test_vector_db") result_vector_store = await cached_registry.get("vector_store", "test_vector_store")
assert result_vector_db is not None assert result_vector_store is not None
assert result_vector_db.identifier == sample_vector_db.identifier assert result_vector_store.identifier == sample_vector_store.identifier
assert result_vector_db.embedding_model == sample_vector_db.embedding_model assert result_vector_store.embedding_model == sample_vector_store.embedding_model
assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension assert result_vector_store.embedding_dimension == sample_vector_store.embedding_dimension
assert result_vector_db.provider_id == sample_vector_db.provider_id assert result_vector_store.provider_id == sample_vector_store.provider_id
async def test_cached_registry_updates(cached_disk_dist_registry): async def test_cached_registry_updates(cached_disk_dist_registry):
new_vector_db = VectorDB( new_vector_store = VectorStore(
identifier="test_vector_db_2", identifier="test_vector_store_2",
embedding_model="nomic-embed-text-v1.5", embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768, embedding_dimension=768,
provider_resource_id="test_vector_db_2", provider_resource_id="test_vector_store_2",
provider_id="baz", provider_id="baz",
) )
await cached_disk_dist_registry.register(new_vector_db) await cached_disk_dist_registry.register(new_vector_store)
# Verify in cache # Verify in cache
result_vector_db = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") result_vector_store = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2")
assert result_vector_db is not None assert result_vector_store is not None
assert result_vector_db.identifier == new_vector_db.identifier assert result_vector_store.identifier == new_vector_store.identifier
assert result_vector_db.provider_id == new_vector_db.provider_id assert result_vector_store.provider_id == new_vector_store.provider_id
# Verify persisted to disk # Verify persisted to disk
db_path = cached_disk_dist_registry.kvstore.db_path db_path = cached_disk_dist_registry.kvstore.db_path
@ -111,87 +111,89 @@ async def test_cached_registry_updates(cached_disk_dist_registry):
await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry")) await kvstore_impl(KVStoreReference(backend=backend_name, namespace="registry"))
) )
await new_registry.initialize() await new_registry.initialize()
result_vector_db = await new_registry.get("vector_db", "test_vector_db_2") result_vector_store = await new_registry.get("vector_store", "test_vector_store_2")
assert result_vector_db is not None assert result_vector_store is not None
assert result_vector_db.identifier == new_vector_db.identifier assert result_vector_store.identifier == new_vector_store.identifier
assert result_vector_db.provider_id == new_vector_db.provider_id assert result_vector_store.provider_id == new_vector_store.provider_id
async def test_duplicate_provider_registration(cached_disk_dist_registry): async def test_duplicate_provider_registration(cached_disk_dist_registry):
original_vector_db = VectorDB( original_vector_store = VectorStore(
identifier="test_vector_db_2", identifier="test_vector_store_2",
embedding_model="nomic-embed-text-v1.5", embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768, embedding_dimension=768,
provider_resource_id="test_vector_db_2", provider_resource_id="test_vector_store_2",
provider_id="baz", provider_id="baz",
) )
assert await cached_disk_dist_registry.register(original_vector_db) assert await cached_disk_dist_registry.register(original_vector_store)
duplicate_vector_db = VectorDB( duplicate_vector_store = VectorStore(
identifier="test_vector_db_2", identifier="test_vector_store_2",
embedding_model="different-model", embedding_model="different-model",
embedding_dimension=768, embedding_dimension=768,
provider_resource_id="test_vector_db_2", provider_resource_id="test_vector_store_2",
provider_id="baz", # Same provider_id provider_id="baz", # Same provider_id
) )
with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db_2' already exists"): with pytest.raises(
await cached_disk_dist_registry.register(duplicate_vector_db) ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store_2' already exists"
):
await cached_disk_dist_registry.register(duplicate_vector_store)
result = await cached_disk_dist_registry.get("vector_db", "test_vector_db_2") result = await cached_disk_dist_registry.get("vector_store", "test_vector_store_2")
assert result is not None assert result is not None
assert result.embedding_model == original_vector_db.embedding_model # Original values preserved assert result.embedding_model == original_vector_store.embedding_model # Original values preserved
async def test_get_all_objects(cached_disk_dist_registry): async def test_get_all_objects(cached_disk_dist_registry):
# Create multiple test banks # Create multiple test banks
# Create multiple test banks # Create multiple test banks
test_vector_dbs = [ test_vector_stores = [
VectorDB( VectorStore(
identifier=f"test_vector_db_{i}", identifier=f"test_vector_store_{i}",
embedding_model="nomic-embed-text-v1.5", embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768, embedding_dimension=768,
provider_resource_id=f"test_vector_db_{i}", provider_resource_id=f"test_vector_store_{i}",
provider_id=f"provider_{i}", provider_id=f"provider_{i}",
) )
for i in range(3) for i in range(3)
] ]
# Register all vector_dbs # Register all vector_stores
for vector_db in test_vector_dbs: for vector_store in test_vector_stores:
await cached_disk_dist_registry.register(vector_db) await cached_disk_dist_registry.register(vector_store)
# Test get_all retrieval # Test get_all retrieval
all_results = await cached_disk_dist_registry.get_all() all_results = await cached_disk_dist_registry.get_all()
assert len(all_results) == 3 assert len(all_results) == 3
# Verify each vector_db was stored correctly # Verify each vector_store was stored correctly
for original_vector_db in test_vector_dbs: for original_vector_store in test_vector_stores:
matching_vector_dbs = [v for v in all_results if v.identifier == original_vector_db.identifier] matching_vector_stores = [v for v in all_results if v.identifier == original_vector_store.identifier]
assert len(matching_vector_dbs) == 1 assert len(matching_vector_stores) == 1
stored_vector_db = matching_vector_dbs[0] stored_vector_store = matching_vector_stores[0]
assert stored_vector_db.embedding_model == original_vector_db.embedding_model assert stored_vector_store.embedding_model == original_vector_store.embedding_model
assert stored_vector_db.provider_id == original_vector_db.provider_id assert stored_vector_store.provider_id == original_vector_store.provider_id
assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension assert stored_vector_store.embedding_dimension == original_vector_store.embedding_dimension
async def test_parse_registry_values_error_handling(sqlite_kvstore): async def test_parse_registry_values_error_handling(sqlite_kvstore):
valid_db = VectorDB( valid_db = VectorStore(
identifier="valid_vector_db", identifier="valid_vector_store",
embedding_model="nomic-embed-text-v1.5", embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768, embedding_dimension=768,
provider_resource_id="valid_vector_db", provider_resource_id="valid_vector_store",
provider_id="test-provider", provider_id="test-provider",
) )
await sqlite_kvstore.set( await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="valid_vector_db"), valid_db.model_dump_json() KEY_FORMAT.format(type="vector_store", identifier="valid_vector_store"), valid_db.model_dump_json()
) )
await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_db", identifier="corrupted_json"), "{not valid json") await sqlite_kvstore.set(KEY_FORMAT.format(type="vector_store", identifier="corrupted_json"), "{not valid json")
await sqlite_kvstore.set( await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="missing_fields"), KEY_FORMAT.format(type="vector_store", identifier="missing_fields"),
'{"type": "vector_db", "identifier": "missing_fields"}', '{"type": "vector_store", "identifier": "missing_fields"}',
) )
test_registry = DiskDistributionRegistry(sqlite_kvstore) test_registry = DiskDistributionRegistry(sqlite_kvstore)
@ -202,18 +204,18 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore):
# Should have filtered out the invalid entries # Should have filtered out the invalid entries
assert len(all_objects) == 1 assert len(all_objects) == 1
assert all_objects[0].identifier == "valid_vector_db" assert all_objects[0].identifier == "valid_vector_store"
# Check that the get method also handles errors correctly # Check that the get method also handles errors correctly
invalid_obj = await test_registry.get("vector_db", "corrupted_json") invalid_obj = await test_registry.get("vector_store", "corrupted_json")
assert invalid_obj is None assert invalid_obj is None
invalid_obj = await test_registry.get("vector_db", "missing_fields") invalid_obj = await test_registry.get("vector_store", "missing_fields")
assert invalid_obj is None assert invalid_obj is None
async def test_cached_registry_error_handling(sqlite_kvstore): async def test_cached_registry_error_handling(sqlite_kvstore):
valid_db = VectorDB( valid_db = VectorStore(
identifier="valid_cached_db", identifier="valid_cached_db",
embedding_model="nomic-embed-text-v1.5", embedding_model="nomic-embed-text-v1.5",
embedding_dimension=768, embedding_dimension=768,
@ -222,12 +224,12 @@ async def test_cached_registry_error_handling(sqlite_kvstore):
) )
await sqlite_kvstore.set( await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="valid_cached_db"), valid_db.model_dump_json() KEY_FORMAT.format(type="vector_store", identifier="valid_cached_db"), valid_db.model_dump_json()
) )
await sqlite_kvstore.set( await sqlite_kvstore.set(
KEY_FORMAT.format(type="vector_db", identifier="invalid_cached_db"), KEY_FORMAT.format(type="vector_store", identifier="invalid_cached_db"),
'{"type": "vector_db", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string '{"type": "vector_store", "identifier": "invalid_cached_db", "embedding_model": 12345}', # Should be string
) )
cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore) cached_registry = CachedDiskDistributionRegistry(sqlite_kvstore)
@ -237,63 +239,65 @@ async def test_cached_registry_error_handling(sqlite_kvstore):
assert len(all_objects) == 1 assert len(all_objects) == 1
assert all_objects[0].identifier == "valid_cached_db" assert all_objects[0].identifier == "valid_cached_db"
invalid_obj = await cached_registry.get("vector_db", "invalid_cached_db") invalid_obj = await cached_registry.get("vector_store", "invalid_cached_db")
assert invalid_obj is None assert invalid_obj is None
async def test_double_registration_identical_objects(disk_dist_registry): async def test_double_registration_identical_objects(disk_dist_registry):
"""Test that registering identical objects succeeds (idempotent).""" """Test that registering identical objects succeeds (idempotent)."""
vector_db = VectorDBWithOwner( vector_store = VectorStoreWithOwner(
identifier="test_vector_db", identifier="test_vector_store",
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384, embedding_dimension=384,
provider_resource_id="test_vector_db", provider_resource_id="test_vector_store",
provider_id="test-provider", provider_id="test-provider",
) )
# First registration should succeed # First registration should succeed
result1 = await disk_dist_registry.register(vector_db) result1 = await disk_dist_registry.register(vector_store)
assert result1 is True assert result1 is True
# Second registration of identical object should also succeed (idempotent) # Second registration of identical object should also succeed (idempotent)
result2 = await disk_dist_registry.register(vector_db) result2 = await disk_dist_registry.register(vector_store)
assert result2 is True assert result2 is True
# Verify object exists and is unchanged # Verify object exists and is unchanged
retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") retrieved = await disk_dist_registry.get("vector_store", "test_vector_store")
assert retrieved is not None assert retrieved is not None
assert retrieved.identifier == vector_db.identifier assert retrieved.identifier == vector_store.identifier
assert retrieved.embedding_model == vector_db.embedding_model assert retrieved.embedding_model == vector_store.embedding_model
async def test_double_registration_different_objects(disk_dist_registry): async def test_double_registration_different_objects(disk_dist_registry):
"""Test that registering different objects with same identifier fails.""" """Test that registering different objects with same identifier fails."""
vector_db1 = VectorDBWithOwner( vector_store1 = VectorStoreWithOwner(
identifier="test_vector_db", identifier="test_vector_store",
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384, embedding_dimension=384,
provider_resource_id="test_vector_db", provider_resource_id="test_vector_store",
provider_id="test-provider", provider_id="test-provider",
) )
vector_db2 = VectorDBWithOwner( vector_store2 = VectorStoreWithOwner(
identifier="test_vector_db", # Same identifier identifier="test_vector_store", # Same identifier
embedding_model="different-model", # Different embedding model embedding_model="different-model", # Different embedding model
embedding_dimension=384, embedding_dimension=384,
provider_resource_id="test_vector_db", provider_resource_id="test_vector_store",
provider_id="test-provider", provider_id="test-provider",
) )
# First registration should succeed # First registration should succeed
result1 = await disk_dist_registry.register(vector_db1) result1 = await disk_dist_registry.register(vector_store1)
assert result1 is True assert result1 is True
# Second registration with different data should fail # Second registration with different data should fail
with pytest.raises(ValueError, match="Object of type 'vector_db' and identifier 'test_vector_db' already exists"): with pytest.raises(
await disk_dist_registry.register(vector_db2) ValueError, match="Object of type 'vector_store' and identifier 'test_vector_store' already exists"
):
await disk_dist_registry.register(vector_store2)
# Verify original object is unchanged # Verify original object is unchanged
retrieved = await disk_dist_registry.get("vector_db", "test_vector_db") retrieved = await disk_dist_registry.get("vector_store", "test_vector_store")
assert retrieved is not None assert retrieved is not None
assert retrieved.embedding_model == "all-MiniLM-L6-v2" # Original value assert retrieved.embedding_model == "all-MiniLM-L6-v2" # Original value

View file

@ -41,7 +41,7 @@ class TestTranslateException:
self.identifier = identifier self.identifier = identifier
self.owner = owner self.owner = owner
resource = MockResource("vector_db", "test-db") resource = MockResource("vector_store", "test-db")
exc = AccessDeniedError("create", resource, user) exc = AccessDeniedError("create", resource, user)
result = translate_exception(exc) result = translate_exception(exc)
@ -49,7 +49,7 @@ class TestTranslateException:
assert isinstance(result, HTTPException) assert isinstance(result, HTTPException)
assert result.status_code == 403 assert result.status_code == 403
assert "test-user" in result.detail assert "test-user" in result.detail
assert "vector_db::test-db" in result.detail assert "vector_store::test-db" in result.detail
assert "create" in result.detail assert "create" in result.detail
assert "roles=['user']" in result.detail assert "roles=['user']" in result.detail
assert "teams=['dev']" in result.detail assert "teams=['dev']" in result.detail