mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-05 12:21:52 +00:00
Agent persistence works
This commit is contained in:
parent
4eb0f30891
commit
59f1fe5af8
17 changed files with 136 additions and 90 deletions
|
@ -8,18 +8,14 @@ from typing import Dict
|
|||
|
||||
from llama_stack.distribution.datatypes import Api, ProviderSpec
|
||||
|
||||
from .config import MetaReferenceImplConfig
|
||||
from .config import MetaReferenceAgentsImplConfig
|
||||
|
||||
|
||||
async def get_provider_impl(
|
||||
config: MetaReferenceImplConfig, deps: Dict[Api, ProviderSpec]
|
||||
config: MetaReferenceAgentsImplConfig, deps: Dict[Api, ProviderSpec]
|
||||
):
|
||||
from .agents import MetaReferenceAgentsImpl
|
||||
|
||||
assert isinstance(
|
||||
config, MetaReferenceImplConfig
|
||||
), f"Unexpected config type: {type(config)}"
|
||||
|
||||
impl = MetaReferenceAgentsImpl(
|
||||
config,
|
||||
deps[Api.inference],
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
import asyncio
|
||||
import copy
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import shutil
|
||||
|
@ -25,7 +26,7 @@ from llama_stack.apis.inference import * # noqa: F403
|
|||
from llama_stack.apis.memory import * # noqa: F403
|
||||
from llama_stack.apis.safety import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.utils.api import KVStore
|
||||
from llama_stack.providers.utils.kvstore import KVStore
|
||||
|
||||
from .rag.context_retriever import generate_rag_query
|
||||
from .safety import SafetyException, ShieldRunnerMixin
|
||||
|
@ -46,6 +47,13 @@ def make_random_string(length: int = 8):
|
|||
)
|
||||
|
||||
|
||||
class AgentSessionInfo(BaseModel):
|
||||
session_id: str
|
||||
session_name: str
|
||||
memory_bank_id: Optional[str] = None
|
||||
started_at: datetime
|
||||
|
||||
|
||||
class ChatAgent(ShieldRunnerMixin):
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -136,21 +144,41 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
async def create_session(self, name: str) -> str:
|
||||
session_id = str(uuid.uuid4())
|
||||
session = dict(
|
||||
session_info = AgentSessionInfo(
|
||||
session_id=session_id,
|
||||
session_name=name,
|
||||
started_at=str(datetime.now()),
|
||||
started_at=datetime.now(),
|
||||
)
|
||||
await self.persistence_store.set(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
value=json.dumps(session),
|
||||
value=session_info.json(),
|
||||
)
|
||||
return session_id
|
||||
|
||||
async def _get_session_info(self, session_id: str) -> Optional[AgentSessionInfo]:
|
||||
value = await self.persistence_store.get(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
)
|
||||
if not value:
|
||||
return None
|
||||
|
||||
return AgentSessionInfo(**json.loads(value))
|
||||
|
||||
async def _add_memory_bank_to_session(self, session_id: str, bank_id: str):
|
||||
session_info = await self._get_session_info(session_id)
|
||||
if session_info is None:
|
||||
raise ValueError(f"Session {session_id} not found")
|
||||
|
||||
session_info.memory_bank_id = bank_id
|
||||
await self.persistence_store.set(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
value=session_info.json(),
|
||||
)
|
||||
|
||||
async def _add_turn_to_session(self, session_id: str, turn: Turn):
|
||||
await self.persistence_store.set(
|
||||
key=f"session:{self.agent_id}:{session_id}:{turn.turn_id}",
|
||||
value=json.dumps(turn.dict()),
|
||||
value=turn.json(),
|
||||
)
|
||||
|
||||
async def _get_session_turns(self, session_id: str) -> List[Turn]:
|
||||
|
@ -169,15 +197,6 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
return turns
|
||||
|
||||
async def _get_session_info(self, session_id: str) -> Optional[Dict[str, str]]:
|
||||
value = await self.persistence_store.get(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
)
|
||||
if not value:
|
||||
return None
|
||||
|
||||
return json.loads(value)
|
||||
|
||||
async def create_and_execute_turn(
|
||||
self, request: AgentTurnCreateRequest
|
||||
) -> AsyncGenerator:
|
||||
|
@ -209,7 +228,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
steps = []
|
||||
output_message = None
|
||||
async for chunk in self.run(
|
||||
session=session,
|
||||
session_id=request.session_id,
|
||||
turn_id=turn_id,
|
||||
input_messages=messages,
|
||||
attachments=request.attachments or [],
|
||||
|
@ -261,7 +280,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
async def run(
|
||||
self,
|
||||
session: Session,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
input_messages: List[Message],
|
||||
attachments: List[Attachment],
|
||||
|
@ -282,7 +301,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
yield res
|
||||
|
||||
async for res in self._run(
|
||||
session, turn_id, input_messages, attachments, sampling_params, stream
|
||||
session_id, turn_id, input_messages, attachments, sampling_params, stream
|
||||
):
|
||||
if isinstance(res, bool):
|
||||
return
|
||||
|
@ -364,7 +383,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
async def _run(
|
||||
self,
|
||||
session: Session,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
input_messages: List[Message],
|
||||
attachments: List[Attachment],
|
||||
|
@ -389,7 +408,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
# TODO: find older context from the session and either replace it
|
||||
# or append with a sliding window. this is really a very simplistic implementation
|
||||
rag_context, bank_ids = await self._retrieve_context(
|
||||
session, input_messages, attachments
|
||||
session_id, input_messages, attachments
|
||||
)
|
||||
|
||||
step_id = str(uuid.uuid4())
|
||||
|
@ -645,17 +664,25 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
|
||||
n_iter += 1
|
||||
|
||||
async def _ensure_memory_bank(self, session: Session) -> MemoryBank:
|
||||
if session.memory_bank is None:
|
||||
session.memory_bank = await self.memory_api.create_memory_bank(
|
||||
name=f"memory_bank_{session.session_id}",
|
||||
async def _ensure_memory_bank(self, session_id: str) -> str:
|
||||
session_info = await self._get_session_info(session_id)
|
||||
if session_info is None:
|
||||
raise ValueError(f"Session {session_id} not found")
|
||||
|
||||
if session_info.memory_bank_id is None:
|
||||
memory_bank = await self.memory_api.create_memory_bank(
|
||||
name=f"memory_bank_{session_id}",
|
||||
config=VectorMemoryBankConfig(
|
||||
embedding_model="sentence-transformer/all-MiniLM-L6-v2",
|
||||
chunk_size_in_tokens=512,
|
||||
),
|
||||
)
|
||||
bank_id = memory_bank.bank_id
|
||||
await self._add_memory_bank_to_session(session_id, bank_id)
|
||||
else:
|
||||
bank_id = session_info.memory_bank_id
|
||||
|
||||
return session.memory_bank
|
||||
return bank_id
|
||||
|
||||
async def _should_retrieve_context(
|
||||
self, messages: List[Message], attachments: List[Attachment]
|
||||
|
@ -680,7 +707,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
return None
|
||||
|
||||
async def _retrieve_context(
|
||||
self, session: Session, messages: List[Message], attachments: List[Attachment]
|
||||
self, session_id: str, messages: List[Message], attachments: List[Attachment]
|
||||
) -> Tuple[List[str], List[int]]: # (rag_context, bank_ids)
|
||||
bank_ids = []
|
||||
|
||||
|
@ -689,8 +716,8 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
bank_ids.extend(c.bank_id for c in memory.memory_bank_configs)
|
||||
|
||||
if attachments:
|
||||
bank = await self._ensure_memory_bank(session)
|
||||
bank_ids.append(bank.bank_id)
|
||||
bank_id = await self._ensure_memory_bank(session_id)
|
||||
bank_ids.append(bank_id)
|
||||
|
||||
documents = [
|
||||
MemoryBankDocument(
|
||||
|
@ -701,9 +728,11 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
for a in attachments
|
||||
]
|
||||
await self.memory_api.insert_documents(bank.bank_id, documents)
|
||||
elif session.memory_bank:
|
||||
bank_ids.append(session.memory_bank.bank_id)
|
||||
await self.memory_api.insert_documents(bank_id, documents)
|
||||
else:
|
||||
session_info = await self._get_session_info(session_id)
|
||||
if session_info.memory_bank_id:
|
||||
bank_ids.append(session_info.memory_bank_id)
|
||||
|
||||
if not bank_ids:
|
||||
# this can happen if the per-session memory bank is not yet populated
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import AsyncGenerator
|
||||
|
@ -14,7 +14,7 @@ from llama_stack.apis.memory import Memory
|
|||
from llama_stack.apis.safety import Safety
|
||||
from llama_stack.apis.agents import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl
|
||||
|
||||
from .agent_instance import ChatAgent
|
||||
from .config import MetaReferenceAgentsImplConfig
|
||||
|
@ -35,6 +35,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.inference_api = inference_api
|
||||
self.memory_api = memory_api
|
||||
self.safety_api = safety_api
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
|
||||
async def initialize(self) -> None:
|
||||
self.persistence_store = await kvstore_impl(self.config.persistence_store)
|
||||
|
@ -47,7 +48,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
|
||||
await self.persistence_store.set(
|
||||
key=f"agent:{agent_id}",
|
||||
value=json.dumps(agent_config.dict()),
|
||||
value=agent_config.json(),
|
||||
)
|
||||
return AgentCreateResponse(
|
||||
agent_id=agent_id,
|
||||
|
@ -80,7 +81,11 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
inference_api=self.inference_api,
|
||||
safety_api=self.safety_api,
|
||||
memory_api=self.memory_api,
|
||||
persistence_store=self.persistence_store,
|
||||
persistence_store=(
|
||||
self.persistence_store
|
||||
if agent_config.enable_session_persistence
|
||||
else self.in_memory_store
|
||||
),
|
||||
)
|
||||
|
||||
async def create_agent_session(
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.kvstore import KVStoreConfig
|
||||
|
||||
|
||||
|
|
|
@ -184,6 +184,7 @@ async def chat_agent(mock_inference_api, mock_safety_api, mock_memory_api):
|
|||
# ),
|
||||
],
|
||||
tool_choice=ToolChoice.auto,
|
||||
enable_session_persistence=False,
|
||||
input_shields=[],
|
||||
output_shields=[],
|
||||
)
|
||||
|
|
|
@ -42,7 +42,6 @@ class FaissIndex(EmbeddingIndex):
|
|||
indexlen = len(self.id_by_index)
|
||||
for i, chunk in enumerate(chunks):
|
||||
self.chunk_by_index[indexlen + i] = chunk
|
||||
logger.info(f"Adding chunk #{indexlen + i} tokens={chunk.token_count}")
|
||||
self.id_by_index[indexlen + i] = chunk.document_id
|
||||
|
||||
self.index.add(np.array(embeddings).astype(np.float32))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue