forked from phoenix-oss/llama-stack-mirror
[memory refactor][1/n] Rename Memory -> VectorIO, MemoryBanks -> VectorDBs (#828)
See https://github.com/meta-llama/llama-stack/issues/827 for the broader design. This is the first part: - delete other kinds of memory banks (keyvalue, keyword, graph) for now; we will introduce a keyvalue store API as part of this design but not use it in the RAG tool yet. - renaming of the APIs
This commit is contained in:
parent
35a00d004a
commit
3ae8585b65
37 changed files with 175 additions and 296 deletions
|
@ -33,7 +33,6 @@ from llama_stack.apis.inference import (
|
|||
ToolResponseMessage,
|
||||
UserMessage,
|
||||
)
|
||||
from llama_stack.apis.memory import MemoryBank
|
||||
from llama_stack.apis.safety import SafetyViolation
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
@ -133,8 +132,6 @@ class Session(BaseModel):
|
|||
turns: List[Turn]
|
||||
started_at: datetime
|
||||
|
||||
memory_bank: Optional[MemoryBank] = None
|
||||
|
||||
|
||||
class AgentToolGroupWithArgs(BaseModel):
|
||||
name: str
|
||||
|
|
|
@ -14,7 +14,7 @@ class Api(Enum):
|
|||
inference = "inference"
|
||||
safety = "safety"
|
||||
agents = "agents"
|
||||
memory = "memory"
|
||||
vector_io = "vector_io"
|
||||
datasetio = "datasetio"
|
||||
scoring = "scoring"
|
||||
eval = "eval"
|
||||
|
@ -25,7 +25,7 @@ class Api(Enum):
|
|||
|
||||
models = "models"
|
||||
shields = "shields"
|
||||
memory_banks = "memory_banks"
|
||||
vector_dbs = "vector_dbs"
|
||||
datasets = "datasets"
|
||||
scoring_functions = "scoring_functions"
|
||||
eval_tasks = "eval_tasks"
|
||||
|
|
|
@ -1,161 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
Annotated,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Protocol,
|
||||
runtime_checkable,
|
||||
Union,
|
||||
)
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, register_schema, webmethod
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MemoryBankType(Enum):
|
||||
vector = "vector"
|
||||
keyvalue = "keyvalue"
|
||||
keyword = "keyword"
|
||||
graph = "graph"
|
||||
|
||||
|
||||
# define params for each type of memory bank, this leads to a tagged union
|
||||
# accepted as input from the API or from the config.
|
||||
@json_schema_type
|
||||
class VectorMemoryBankParams(BaseModel):
|
||||
memory_bank_type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
|
||||
embedding_model: str
|
||||
chunk_size_in_tokens: int
|
||||
overlap_size_in_tokens: Optional[int] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class KeyValueMemoryBankParams(BaseModel):
|
||||
memory_bank_type: Literal[MemoryBankType.keyvalue.value] = (
|
||||
MemoryBankType.keyvalue.value
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class KeywordMemoryBankParams(BaseModel):
|
||||
memory_bank_type: Literal[MemoryBankType.keyword.value] = (
|
||||
MemoryBankType.keyword.value
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class GraphMemoryBankParams(BaseModel):
|
||||
memory_bank_type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value
|
||||
|
||||
|
||||
BankParams = Annotated[
|
||||
Union[
|
||||
VectorMemoryBankParams,
|
||||
KeyValueMemoryBankParams,
|
||||
KeywordMemoryBankParams,
|
||||
GraphMemoryBankParams,
|
||||
],
|
||||
Field(discriminator="memory_bank_type"),
|
||||
]
|
||||
|
||||
|
||||
# Some common functionality for memory banks.
|
||||
class MemoryBankResourceMixin(Resource):
|
||||
type: Literal[ResourceType.memory_bank.value] = ResourceType.memory_bank.value
|
||||
|
||||
@property
|
||||
def memory_bank_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_memory_bank_id(self) -> str:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorMemoryBank(MemoryBankResourceMixin):
|
||||
memory_bank_type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
|
||||
embedding_model: str
|
||||
chunk_size_in_tokens: int
|
||||
embedding_dimension: Optional[int] = 384 # default to minilm-l6-v2
|
||||
overlap_size_in_tokens: Optional[int] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class KeyValueMemoryBank(MemoryBankResourceMixin):
|
||||
memory_bank_type: Literal[MemoryBankType.keyvalue.value] = (
|
||||
MemoryBankType.keyvalue.value
|
||||
)
|
||||
|
||||
|
||||
# TODO: KeyValue and Keyword are so similar in name, oof. Get a better naming convention.
|
||||
@json_schema_type
|
||||
class KeywordMemoryBank(MemoryBankResourceMixin):
|
||||
memory_bank_type: Literal[MemoryBankType.keyword.value] = (
|
||||
MemoryBankType.keyword.value
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class GraphMemoryBank(MemoryBankResourceMixin):
|
||||
memory_bank_type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value
|
||||
|
||||
|
||||
MemoryBank = register_schema(
|
||||
Annotated[
|
||||
Union[
|
||||
VectorMemoryBank,
|
||||
KeyValueMemoryBank,
|
||||
KeywordMemoryBank,
|
||||
GraphMemoryBank,
|
||||
],
|
||||
Field(discriminator="memory_bank_type"),
|
||||
],
|
||||
name="MemoryBank",
|
||||
)
|
||||
|
||||
|
||||
class MemoryBankInput(BaseModel):
|
||||
memory_bank_id: str
|
||||
params: BankParams
|
||||
provider_memory_bank_id: Optional[str] = None
|
||||
|
||||
|
||||
class ListMemoryBanksResponse(BaseModel):
|
||||
data: List[MemoryBank]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class MemoryBanks(Protocol):
|
||||
@webmethod(route="/memory-banks", method="GET")
|
||||
async def list_memory_banks(self) -> ListMemoryBanksResponse: ...
|
||||
|
||||
@webmethod(route="/memory-banks/{memory_bank_id}", method="GET")
|
||||
async def get_memory_bank(
|
||||
self,
|
||||
memory_bank_id: str,
|
||||
) -> Optional[MemoryBank]: ...
|
||||
|
||||
@webmethod(route="/memory-banks", method="POST")
|
||||
async def register_memory_bank(
|
||||
self,
|
||||
memory_bank_id: str,
|
||||
params: BankParams,
|
||||
provider_id: Optional[str] = None,
|
||||
provider_memory_bank_id: Optional[str] = None,
|
||||
) -> MemoryBank: ...
|
||||
|
||||
@webmethod(route="/memory-banks/{memory_bank_id}", method="DELETE")
|
||||
async def unregister_memory_bank(self, memory_bank_id: str) -> None: ...
|
|
@ -14,7 +14,7 @@ from pydantic import BaseModel, Field
|
|||
class ResourceType(Enum):
|
||||
model = "model"
|
||||
shield = "shield"
|
||||
memory_bank = "memory_bank"
|
||||
vector_db = "vector_db"
|
||||
dataset = "dataset"
|
||||
scoring_function = "scoring_function"
|
||||
eval_task = "eval_task"
|
||||
|
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .memory_banks import * # noqa: F401 F403
|
||||
from .vector_dbs import * # noqa: F401 F403
|
66
llama_stack/apis/vector_dbs/vector_dbs.py
Normal file
66
llama_stack/apis/vector_dbs/vector_dbs.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import List, Literal, Optional, Protocol, runtime_checkable
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, webmethod
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorDB(Resource):
|
||||
type: Literal[ResourceType.vector_db.value] = ResourceType.vector_db.value
|
||||
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
|
||||
@property
|
||||
def vector_db_id(self) -> str:
|
||||
return self.identifier
|
||||
|
||||
@property
|
||||
def provider_vector_db_id(self) -> str:
|
||||
return self.provider_resource_id
|
||||
|
||||
|
||||
class VectorDBInput(BaseModel):
|
||||
vector_db_id: str
|
||||
embedding_model: str
|
||||
embedding_dimension: int
|
||||
provider_vector_db_id: Optional[str] = None
|
||||
|
||||
|
||||
class ListVectorDBsResponse(BaseModel):
|
||||
data: List[VectorDB]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class VectorDBs(Protocol):
|
||||
@webmethod(route="/vector-dbs", method="GET")
|
||||
async def list_vector_dbs(self) -> ListVectorDBsResponse: ...
|
||||
|
||||
@webmethod(route="/vector-dbs/{vector_db_id}", method="GET")
|
||||
async def get_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
) -> Optional[VectorDB]: ...
|
||||
|
||||
@webmethod(route="/vector-dbs", method="POST")
|
||||
async def register_vector_db(
|
||||
self,
|
||||
vector_db_id: str,
|
||||
embedding_model: str,
|
||||
embedding_dimension: Optional[int] = 384,
|
||||
provider_id: Optional[str] = None,
|
||||
provider_vector_db_id: Optional[str] = None,
|
||||
) -> VectorDB: ...
|
||||
|
||||
@webmethod(route="/vector-dbs/{vector_db_id}", method="DELETE")
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None: ...
|
|
@ -4,4 +4,4 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .memory import * # noqa: F401 F403
|
||||
from .vector_io import * # noqa: F401 F403
|
|
@ -13,55 +13,45 @@ from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
|
|||
from llama_models.schema_utils import json_schema_type, webmethod
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.memory_banks import MemoryBank
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MemoryBankDocument(BaseModel):
|
||||
document_id: str
|
||||
content: InterleavedContent | URL
|
||||
mime_type: str | None = None
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class Chunk(BaseModel):
|
||||
content: InterleavedContent
|
||||
token_count: int
|
||||
document_id: str
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class QueryDocumentsResponse(BaseModel):
|
||||
class QueryChunksResponse(BaseModel):
|
||||
chunks: List[Chunk]
|
||||
scores: List[float]
|
||||
|
||||
|
||||
class MemoryBankStore(Protocol):
|
||||
def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ...
|
||||
class VectorDBStore(Protocol):
|
||||
def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Memory(Protocol):
|
||||
memory_bank_store: MemoryBankStore
|
||||
class VectorIO(Protocol):
|
||||
vector_db_store: VectorDBStore
|
||||
|
||||
# this will just block now until documents are inserted, but it should
|
||||
# probably return a Job instance which can be polled for completion
|
||||
@webmethod(route="/memory/insert", method="POST")
|
||||
async def insert_documents(
|
||||
@webmethod(route="/vector-io/insert", method="POST")
|
||||
async def insert_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
documents: List[MemoryBankDocument],
|
||||
vector_db_id: str,
|
||||
chunks: List[Chunk],
|
||||
ttl_seconds: Optional[int] = None,
|
||||
) -> None: ...
|
||||
|
||||
@webmethod(route="/memory/query", method="POST")
|
||||
async def query_documents(
|
||||
@webmethod(route="/vector-io/query", method="POST")
|
||||
async def query_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
vector_db_id: str,
|
||||
query: InterleavedContent,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
) -> QueryDocumentsResponse: ...
|
||||
) -> QueryChunksResponse: ...
|
Loading…
Add table
Add a link
Reference in a new issue