mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-01 20:18:50 +00:00
[memory refactor][6/n] Update naming and routes (#839)
Making a few small naming changes as per feedback: - RAGToolRuntime methods are called `insert` and `query` to keep them more general - The tool names are changed to non-namespaced forms `insert_into_memory` and `query_from_memory` - The REST endpoints are more REST-ful
This commit is contained in:
parent
c9e5578151
commit
a63a43c646
11 changed files with 319 additions and 330 deletions
|
@ -84,7 +84,7 @@ def make_random_string(length: int = 8):
|
|||
|
||||
|
||||
TOOLS_ATTACHMENT_KEY_REGEX = re.compile(r"__tools_attachment__=(\{.*?\})")
|
||||
MEMORY_QUERY_TOOL = "rag_tool.query_context"
|
||||
MEMORY_QUERY_TOOL = "query_from_memory"
|
||||
WEB_SEARCH_TOOL = "web_search"
|
||||
MEMORY_GROUP = "builtin::memory"
|
||||
|
||||
|
@ -432,16 +432,16 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
)
|
||||
)
|
||||
result = await self.tool_runtime_api.rag_tool.query_context(
|
||||
result = await self.tool_runtime_api.rag_tool.query(
|
||||
content=concat_interleaved_content(
|
||||
[msg.content for msg in input_messages]
|
||||
),
|
||||
vector_db_ids=vector_db_ids,
|
||||
query_config=RAGQueryConfig(
|
||||
query_generator_config=DefaultRAGQueryGeneratorConfig(),
|
||||
max_tokens_in_context=4096,
|
||||
max_chunks=5,
|
||||
),
|
||||
vector_db_ids=vector_db_ids,
|
||||
)
|
||||
retrieved_context = result.content
|
||||
|
||||
|
@ -882,7 +882,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
)
|
||||
for a in data
|
||||
]
|
||||
await self.tool_runtime_api.rag_tool.insert_documents(
|
||||
await self.tool_runtime_api.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=vector_db_id,
|
||||
chunk_size_in_tokens=512,
|
||||
|
|
|
@ -61,7 +61,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
|||
async def shutdown(self):
|
||||
pass
|
||||
|
||||
async def insert_documents(
|
||||
async def insert(
|
||||
self,
|
||||
documents: List[RAGDocument],
|
||||
vector_db_id: str,
|
||||
|
@ -87,15 +87,16 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
|||
vector_db_id=vector_db_id,
|
||||
)
|
||||
|
||||
async def query_context(
|
||||
async def query(
|
||||
self,
|
||||
content: InterleavedContent,
|
||||
query_config: RAGQueryConfig,
|
||||
vector_db_ids: List[str],
|
||||
query_config: Optional[RAGQueryConfig] = None,
|
||||
) -> RAGQueryResult:
|
||||
if not vector_db_ids:
|
||||
return RAGQueryResult(content=None)
|
||||
|
||||
query_config = query_config or RAGQueryConfig()
|
||||
query = await generate_rag_query(
|
||||
query_config.query_generator_config,
|
||||
content,
|
||||
|
@ -159,11 +160,11 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
|||
# encountering fatals.
|
||||
return [
|
||||
ToolDef(
|
||||
name="rag_tool.query_context",
|
||||
name="query_from_memory",
|
||||
description="Retrieve context from memory",
|
||||
),
|
||||
ToolDef(
|
||||
name="rag_tool.insert_documents",
|
||||
name="insert_into_memory",
|
||||
description="Insert documents into memory",
|
||||
),
|
||||
]
|
||||
|
|
|
@ -96,14 +96,14 @@ class TestTools:
|
|||
)
|
||||
|
||||
# Insert documents into memory
|
||||
await tools_impl.rag_tool.insert_documents(
|
||||
await tools_impl.rag_tool.insert(
|
||||
documents=sample_documents,
|
||||
vector_db_id="test_bank",
|
||||
chunk_size_in_tokens=512,
|
||||
)
|
||||
|
||||
# Execute the memory tool
|
||||
response = await tools_impl.rag_tool.query_context(
|
||||
response = await tools_impl.rag_tool.query(
|
||||
content="What are the main topics covered in the documentation?",
|
||||
vector_db_ids=["test_bank"],
|
||||
)
|
||||
|
|
|
@ -11,11 +11,9 @@ from pathlib import Path
|
|||
|
||||
import pytest
|
||||
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
content_from_doc,
|
||||
MemoryBankDocument,
|
||||
URL,
|
||||
)
|
||||
from llama_stack.apis.tools import RAGDocument
|
||||
|
||||
from llama_stack.providers.utils.memory.vector_store import content_from_doc, URL
|
||||
|
||||
DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf"
|
||||
|
||||
|
@ -41,33 +39,33 @@ class TestVectorStore:
|
|||
@pytest.mark.asyncio
|
||||
async def test_returns_content_from_pdf_data_uri(self):
|
||||
data_uri = data_url_from_file(DUMMY_PDF_PATH)
|
||||
doc = MemoryBankDocument(
|
||||
doc = RAGDocument(
|
||||
document_id="dummy",
|
||||
content=data_uri,
|
||||
mime_type="application/pdf",
|
||||
metadata={},
|
||||
)
|
||||
content = await content_from_doc(doc)
|
||||
assert content == "Dummy PDF file"
|
||||
assert content == "Dumm y PDF file"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_downloads_pdf_and_returns_content(self):
|
||||
# Using GitHub to host the PDF file
|
||||
url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
|
||||
doc = MemoryBankDocument(
|
||||
doc = RAGDocument(
|
||||
document_id="dummy",
|
||||
content=url,
|
||||
mime_type="application/pdf",
|
||||
metadata={},
|
||||
)
|
||||
content = await content_from_doc(doc)
|
||||
assert content == "Dummy PDF file"
|
||||
assert content == "Dumm y PDF file"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_downloads_pdf_and_returns_content_with_url_object(self):
|
||||
# Using GitHub to host the PDF file
|
||||
url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
|
||||
doc = MemoryBankDocument(
|
||||
doc = RAGDocument(
|
||||
document_id="dummy",
|
||||
content=URL(
|
||||
uri=url,
|
||||
|
@ -76,4 +74,4 @@ class TestVectorStore:
|
|||
metadata={},
|
||||
)
|
||||
content = await content_from_doc(doc)
|
||||
assert content == "Dummy PDF file"
|
||||
assert content == "Dumm y PDF file"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue