flesh out memory banks API

This commit is contained in:
Ashwin Bharambe 2024-08-23 06:38:15 -07:00
parent 31289e3f47
commit 77d6055d9f
11 changed files with 1792 additions and 974 deletions

View file

@ -10,13 +10,13 @@ from typing import Any, Dict, List, Literal, Optional, Union
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, ConfigDict, Field, validator
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import Annotated
from llama_toolchain.common.deployment_types import * # noqa: F403
from llama_toolchain.inference.api import * # noqa: F403
from llama_toolchain.safety.api.datatypes import * # noqa: F403
from llama_toolchain.memory.api.datatypes import * # noqa: F403
from llama_toolchain.safety.api import * # noqa: F403
from llama_toolchain.memory.api import * # noqa: F403
@json_schema_type
@ -25,31 +25,81 @@ class Attachment(BaseModel):
mime_type: str
class AgenticSystemBuiltinTool(BuiltinTool):
class AgenticSystemTool(Enum):
brave_search = "brave_search"
wolfram_alpha = "wolfram_alpha"
photogen = "photogen"
code_interpreter = "code_interpreter"
function_call = "function_call"
memory = "memory"
@json_schema_type
class AgenticSystemToolDefinition(BaseModel):
tool_name: Union[AgenticSystemBuiltinTool, str]
description: Optional[str] = None
parameters: Optional[Dict[str, ToolParamDefinition]] = None
@validator("tool_name", pre=True)
@classmethod
def validate_field(cls, v):
if isinstance(v, str):
try:
return AgenticSystemBuiltinTool(v)
except ValueError:
return v
return v
execution_config: Optional[RestAPIExecutionConfig] = None
class ToolDefinitionCommon(BaseModel):
input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
@json_schema_type
class BraveSearchToolDefinition(ToolDefinitionCommon):
type: Literal[AgenticSystemTool.brave_search.value] = (
AgenticSystemTool.brave_search.value
)
remote_execution: Optional[RestAPIExecutionConfig] = None
@json_schema_type
class WolframAlphaToolDefinition(ToolDefinitionCommon):
type: Literal[AgenticSystemTool.wolfram_alpha.value] = (
AgenticSystemTool.wolfram_alpha.value
)
remote_execution: Optional[RestAPIExecutionConfig] = None
@json_schema_type
class PhotogenToolDefinition(ToolDefinitionCommon):
type: Literal[AgenticSystemTool.photogen.value] = AgenticSystemTool.photogen.value
remote_execution: Optional[RestAPIExecutionConfig] = None
@json_schema_type
class CodeInterpreterToolDefinition(ToolDefinitionCommon):
type: Literal[AgenticSystemTool.code_interpreter.value] = (
AgenticSystemTool.code_interpreter.value
)
enable_inline_code_execution: bool = True
remote_execution: Optional[RestAPIExecutionConfig] = None
@json_schema_type
class FunctionCallToolDefinition(ToolDefinitionCommon):
type: Literal[AgenticSystemTool.function_call.value] = (
AgenticSystemTool.function_call.value
)
description: str
parameters: Dict[str, ToolParamDefinition]
remote_execution: Optional[RestAPIExecutionConfig] = None
@json_schema_type
class MemoryToolDefinition(ToolDefinitionCommon):
type: Literal[AgenticSystemTool.memory.value] = AgenticSystemTool.memory.value
memory_banks: List[MemoryBank] = Field(default_factory=list)
AgenticSystemToolDefinition = Annotated[
Union[
BraveSearchToolDefinition,
WolframAlphaToolDefinition,
PhotogenToolDefinition,
CodeInterpreterToolDefinition,
FunctionCallToolDefinition,
MemoryToolDefinition,
],
Field(discriminator="type"),
]
class StepCommon(BaseModel):
turn_id: str
step_id: str
@ -136,27 +186,45 @@ class Session(BaseModel):
started_at: datetime
@json_schema_type
class MemoryConfig(BaseModel):
memory_bank_id: str
class MemoryBankConfigCommon(BaseModel):
bank_id: str
# this configuration can hold other information we may want to configure
# how will the agent use the memory bank API?
#
#
class VectorMemoryBankConfig(MemoryBankConfigCommon):
type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
class KeyValueMemoryBankConfig(MemoryBankConfigCommon):
type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value
keys: List[str] # what keys to focus on
class KeywordMemoryBankConfig(MemoryBankConfigCommon):
type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value
class GraphMemoryBankConfig(MemoryBankConfigCommon):
type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value
entities: List[str] # what entities to focus on
MemoryBankConfig = Annotated[
Union[
VectorMemoryBankConfig,
KeyValueMemoryBankConfig,
KeywordMemoryBankConfig,
GraphMemoryBankConfig,
],
Field(discriminator="type"),
]
class AgentConfigCommon(BaseModel):
sampling_params: Optional[SamplingParams] = SamplingParams()
memory_configs: Optional[List[MemoryConfig]] = Field(default_factory=list)
input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list)
# if you completely want to replace the messages prefixed by the system,
# this is debug only
debug_prefix_messages: Optional[List[Message]] = Field(default_factory=list)
tools: Optional[List[AgenticSystemToolDefinition]] = Field(default_factory=list)
tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto)
tool_prompt_format: Optional[ToolPromptFormat] = Field(
@ -168,6 +236,7 @@ class AgentConfigCommon(BaseModel):
class AgentConfig(AgentConfigCommon):
model: str
instructions: str
memory_bank_configs: Optional[List[MemoryBankConfig]] = Field(default_factory=list)
class AgentConfigOverridablePerTurn(AgentConfigCommon):

View file

@ -10,21 +10,9 @@ from typing import Protocol
from llama_models.schema_utils import json_schema_type, webmethod
@json_schema_type
class AgenticSystemCreateRequest(BaseModel):
agent_config: AgentConfig
@json_schema_type
class AgenticSystemCreateResponse(BaseModel):
# TODO: rename this to agent_id
system_id: str
@json_schema_type
class AgenticSystemSessionCreateRequest(BaseModel):
system_id: str
session_name: str
agent_id: str
@json_schema_type
@ -33,8 +21,8 @@ class AgenticSystemSessionCreateResponse(BaseModel):
@json_schema_type
class AgenticSystemTurnCreateRequest(BaseModel, AgentConfigOverridablePerTurn):
system_id: str
class AgenticSystemTurnCreateRequest(AgentConfigOverridablePerTurn):
agent_id: str
session_id: str
# TODO: figure out how we can simplify this and make why
@ -67,7 +55,7 @@ class AgenticSystem(Protocol):
@webmethod(route="/agentic_system/create")
async def create_agentic_system(
self,
request: AgenticSystemCreateRequest,
agent_config: AgentConfig,
) -> AgenticSystemCreateResponse: ...
@webmethod(route="/agentic_system/turn/create")
@ -91,7 +79,8 @@ class AgenticSystem(Protocol):
@webmethod(route="/agentic_system/session/create")
async def create_agentic_system_session(
self,
request: AgenticSystemSessionCreateRequest,
agent_id: str,
session_name: str,
) -> AgenticSystemSessionCreateResponse: ...
@webmethod(route="/agentic_system/session/get")

View file

@ -26,9 +26,7 @@ from llama_toolchain.agentic_system.event_logger import EventLogger
from .api import (
AgentConfig,
AgenticSystem,
AgenticSystemCreateRequest,
AgenticSystemCreateResponse,
AgenticSystemSessionCreateRequest,
AgenticSystemSessionCreateResponse,
AgenticSystemToolDefinition,
AgenticSystemTurnCreateRequest,
@ -127,28 +125,24 @@ async def run_main(host: str, port: int):
),
]
create_request = AgenticSystemCreateRequest(
agent_config = AgentConfig(
model="Meta-Llama3.1-8B-Instruct",
agent_config=AgentConfig(
instructions="You are a helpful assistant",
sampling_params=SamplingParams(),
available_tools=tool_definitions,
tools=tool_definitions,
input_shields=[],
output_shields=[],
debug_prefix_messages=[],
tool_prompt_format=ToolPromptFormat.json,
),
)
create_response = await api.create_agentic_system(create_request)
create_response = await api.create_agentic_system(agent_config)
print(create_response)
session_response = await api.create_agentic_system_session(
AgenticSystemSessionCreateRequest(
system_id=create_response.system_id,
agent_id=create_response.agent_id,
session_name="test_session",
)
)
print(session_response)
user_prompts = [
@ -162,7 +156,7 @@ async def run_main(host: str, port: int):
cprint(f"User> {content}", color="blue")
iterator = api.create_agentic_system_turn(
AgenticSystemTurnCreateRequest(
system_id=create_response.system_id,
agent_id=create_response.agent_id,
session_id=session_response.session_id,
messages=[
UserMessage(content=content),

View file

@ -8,7 +8,7 @@
import copy
import uuid
from datetime import datetime
from typing import AsyncGenerator, List, Optional
from typing import AsyncGenerator, List
from llama_models.llama3.api.datatypes import ToolPromptFormat
@ -326,7 +326,7 @@ class ChatAgent(ShieldRunnerMixin):
req = ChatCompletionRequest(
model=self.agent_config.model,
messages=input_messages,
tools=self.agent_config.available_tools,
tools=self.agent_config.tools,
tool_prompt_format=self.agent_config.tool_prompt_format,
stream=True,
sampling_params=sampling_params,

View file

@ -24,7 +24,7 @@ from llama_toolchain.tools.builtin import (
)
from llama_toolchain.tools.safety import with_safety
from .agent_instance import AgentInstance, ChatAgent
from .agent_instance import ChatAgent
from .config import MetaReferenceImplConfig
@ -71,11 +71,11 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
self,
request: AgenticSystemCreateRequest,
) -> AgenticSystemCreateResponse:
system_id = str(uuid.uuid4())
agent_id = str(uuid.uuid4())
builtin_tools = []
cfg = request.agent_config
for dfn in cfg.available_tools:
for dfn in cfg.tools:
if isinstance(dfn.tool_name, BuiltinTool):
if dfn.tool_name == BuiltinTool.wolfram_alpha:
key = self.config.wolfram_api_key
@ -102,7 +102,7 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
)
)
AGENT_INSTANCES_BY_ID[system_id] = ChatAgent(
AGENT_INSTANCES_BY_ID[agent_id] = ChatAgent(
agent_config=cfg,
inference_api=self.inference_api,
safety_api=self.safety_api,
@ -111,16 +111,16 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
)
return AgenticSystemCreateResponse(
system_id=system_id,
agent_id=agent_id,
)
async def create_agentic_system_session(
self,
request: AgenticSystemSessionCreateRequest,
) -> AgenticSystemSessionCreateResponse:
system_id = request.system_id
assert system_id in AGENT_INSTANCES_BY_ID, f"System {system_id} not found"
agent = AGENT_INSTANCES_BY_ID[system_id]
agent_id = request.agent_id
assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found"
agent = AGENT_INSTANCES_BY_ID[agent_id]
session = agent.create_session(request.session_name)
return AgenticSystemSessionCreateResponse(
@ -131,9 +131,9 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem):
self,
request: AgenticSystemTurnCreateRequest,
) -> AsyncGenerator:
system_id = request.system_id
assert system_id in AGENT_INSTANCES_BY_ID, f"System {system_id} not found"
agent = AGENT_INSTANCES_BY_ID[system_id]
agent_id = request.agent_id
assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found"
agent = AGENT_INSTANCES_BY_ID[agent_id]
assert (
request.session_id in agent.sessions

View file

@ -19,7 +19,7 @@ from llama_toolchain.inference.api import Message
async def execute_with_custom_tools(
system: AgenticSystem,
system_id: str,
agent_id: str,
session_id: str,
messages: List[Message],
custom_tools: List[Any],
@ -35,7 +35,7 @@ async def execute_with_custom_tools(
n_iter += 1
request = AgenticSystemTurnCreateRequest(
system_id=system_id,
agent_id=agent_id,
session_id=session_id,
messages=current_messages,
stream=stream,

View file

@ -14,12 +14,7 @@ from llama_models.llama3.api.datatypes import (
ToolPromptFormat,
)
from llama_toolchain.agentic_system.api import (
AgentConfig,
AgenticSystemCreateRequest,
AgenticSystemSessionCreateRequest,
AgenticSystemToolDefinition,
)
from llama_toolchain.agentic_system.api import AgentConfig, AgenticSystemToolDefinition
from llama_toolchain.agentic_system.client import AgenticSystemClient
from llama_toolchain.agentic_system.meta_reference.execute_with_custom_tools import (
@ -32,9 +27,9 @@ from llama_toolchain.safety.api.datatypes import BuiltinShield, ShieldDefinition
class AgenticSystemClientWrapper:
def __init__(self, api, system_id, custom_tools):
def __init__(self, api, agent_id, custom_tools):
self.api = api
self.system_id = system_id
self.agent_id = agent_id
self.custom_tools = custom_tools
self.session_id = None
@ -43,18 +38,16 @@ class AgenticSystemClientWrapper:
name = f"Session-{uuid.uuid4()}"
response = await self.api.create_agentic_system_session(
AgenticSystemSessionCreateRequest(
system_id=self.system_id,
agent_id=self.agent_id,
session_name=name,
)
)
self.session_id = response.session_id
return self.session_id
async def run(self, messages: List[Message], stream: bool = True):
async for chunk in execute_with_custom_tools(
self.api,
self.system_id,
self.agent_id,
self.session_id,
messages,
self.custom_tools,
@ -98,11 +91,10 @@ async def get_agent_system_instance(
ShieldDefinition(shield_type=BuiltinShield.injection_shield),
]
create_request = AgenticSystemCreateRequest(
agent_config = AgentConfig(
model=model,
agent_config=AgentConfig(
instructions="You are a helpful assistant",
available_tools=tool_definitions,
tools=tool_definitions,
input_shields=(
[]
if disable_safety
@ -120,7 +112,6 @@ async def get_agent_system_instance(
),
sampling_params=SamplingParams(),
tool_prompt_format=tool_prompt_format,
),
)
create_response = await api.create_agentic_system(create_request)
return AgenticSystemClientWrapper(api, create_response.system_id, custom_tools)
create_response = await api.create_agentic_system(agent_config)
return AgenticSystemClientWrapper(api, create_response.agent_id, custom_tools)

View file

@ -3,23 +3,3 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel
@json_schema_type
class MemoryBank(BaseModel):
memory_bank_id: str
memory_bank_name: str
@json_schema_type
class MemoryBankDocument(BaseModel):
document_id: str
content: bytes
metadata: Dict[str, Any]
mime_type: str

View file

@ -6,76 +6,132 @@
from typing import List, Protocol
from llama_models.llama3.api.datatypes import InterleavedTextMedia
from pydantic import BaseModel, Field
from typing_extensions import Annotated
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_models.schema_utils import webmethod
from .datatypes import * # noqa: F403
@json_schema_type
class RetrieveMemoryDocumentsRequest(BaseModel):
query: InterleavedTextMedia
bank_ids: str
class MemoryBankDocument(BaseModel):
document_id: str
content: InterleavedTextMedia | URL
mime_type: str
metadata: Dict[str, Any]
class Chunk(BaseModel):
content: InterleavedTextMedia
token_count: int
@json_schema_type
class RetrieveMemoryDocumentsResponse(BaseModel):
documents: List[MemoryBankDocument]
class QueryDocumentsResponse(BaseModel):
chunks: List[Chunk]
scores: List[float]
@json_schema_type
class MemoryBankType(Enum):
vector = "vector"
keyvalue = "keyvalue"
keyword = "keyword"
graph = "graph"
class VectorMemoryBankConfig(BaseModel):
type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value
embedding_model: str
class KeyValueMemoryBankConfig(BaseModel):
type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value
class KeywordMemoryBankConfig(BaseModel):
type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value
class GraphMemoryBankConfig(BaseModel):
type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value
MemoryBankConfig = Annotated[
Union[
VectorMemoryBankConfig,
KeyValueMemoryBankConfig,
KeywordMemoryBankConfig,
GraphMemoryBankConfig,
],
Field(discriminator="type"),
]
@json_schema_type
class MemoryBank(BaseModel):
bank_id: str
name: str
config: MemoryBankConfig
# if there's a pre-existing store which obeys the MemoryBank REST interface
url: Optional[URL] = None
class Memory(Protocol):
@webmethod(route="/memory_banks/create")
def create_memory_bank(
self,
bank_id: str,
bank_name: str,
embedding_model: str,
documents: List[MemoryBankDocument],
) -> None: ...
name: str,
config: MemoryBankConfig,
url: Optional[URL] = None,
) -> MemoryBank: ...
@webmethod(route="/memory_banks/list")
def get_memory_banks(self) -> List[MemoryBank]: ...
@webmethod(route="/memory_banks/list", method="GET")
def list_memory_banks(self) -> List[MemoryBank]: ...
@webmethod(route="/memory_banks/get")
def get_memory_bank(self, bank_id: str) -> List[MemoryBank]: ...
def get_memory_bank(self, bank_id: str) -> MemoryBank: ...
@webmethod(route="/memory_banks/drop")
def delete_memory_bank(
@webmethod(route="/memory_banks/drop", method="DELETE")
def drop_memory_bank(
self,
bank_id: str,
) -> str: ...
@webmethod(route="/memory_bank/insert")
def insert_memory_documents(
def insert_documents(
self,
bank_id: str,
documents: List[MemoryBankDocument],
) -> None: ...
@webmethod(route="/memory_bank/update")
def update_memory_documents(
def update_documents(
self,
bank_id: str,
documents: List[MemoryBankDocument],
) -> None: ...
@webmethod(route="/memory_bank/get")
def retrieve_memory_documents(
self,
request: RetrieveMemoryDocumentsRequest,
) -> List[MemoryBankDocument]: ...
@webmethod(route="/memory_bank/get")
def get_memory_documents(
@webmethod(route="/memory_bank/query")
def query_documents(
self,
bank_id: str,
document_uuids: List[str],
) -> List[MemoryBankDocument]: ...
query: InterleavedTextMedia,
params: Optional[Dict[str, Any]] = None,
) -> QueryDocumentsResponse: ...
@webmethod(route="/memory_bank/delete")
def delete_memory_documents(
@webmethod(route="/memory_bank/documents/get")
def get_documents(
self,
bank_id: str,
document_uuids: List[str],
) -> List[str]: ...
document_ids: List[str],
) -> List[MemoryBankDocument]: ...
@webmethod(route="/memory_bank/documents/delete")
def delete_documents(
self,
bank_id: str,
document_ids: List[str],
) -> None: ...

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff