[memory refactor][5/n] Migrate all vector_io providers (#835)

See https://github.com/meta-llama/llama-stack/issues/827 for the broader
design.

This PR finishes off all the stragglers and migrates everything to the
new naming.
This commit is contained in:
Ashwin Bharambe 2025-01-22 10:17:59 -08:00 committed by GitHub
parent 63f37f9b7c
commit c9e5578151
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
78 changed files with 504 additions and 623 deletions

View file

@ -9,7 +9,7 @@ import os
import pytest
import pytest_asyncio
from llama_stack.apis.inference import Model
from llama_stack.apis.memory_banks import VectorMemoryBank
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.distribution.store.registry import (
CachedDiskDistributionRegistry,
@ -42,13 +42,12 @@ async def cached_registry(config):
@pytest.fixture
def sample_bank():
return VectorMemoryBank(
identifier="test_bank",
def sample_vector_db():
return VectorDB(
identifier="test_vector_db",
embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=512,
overlap_size_in_tokens=64,
provider_resource_id="test_bank",
embedding_dimension=384,
provider_resource_id="test_vector_db",
provider_id="test-provider",
)
@ -70,19 +69,17 @@ async def test_registry_initialization(registry):
@pytest.mark.asyncio
async def test_basic_registration(registry, sample_bank, sample_model):
print(f"Registering {sample_bank}")
await registry.register(sample_bank)
async def test_basic_registration(registry, sample_vector_db, sample_model):
print(f"Registering {sample_vector_db}")
await registry.register(sample_vector_db)
print(f"Registering {sample_model}")
await registry.register(sample_model)
print("Getting bank")
result_bank = await registry.get("memory_bank", "test_bank")
assert result_bank is not None
assert result_bank.identifier == sample_bank.identifier
assert result_bank.embedding_model == sample_bank.embedding_model
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens
assert result_bank.provider_id == sample_bank.provider_id
print("Getting vector_db")
result_vector_db = await registry.get("vector_db", "test_vector_db")
assert result_vector_db is not None
assert result_vector_db.identifier == sample_vector_db.identifier
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
assert result_vector_db.provider_id == sample_vector_db.provider_id
result_model = await registry.get("model", "test_model")
assert result_model is not None
@ -91,24 +88,23 @@ async def test_basic_registration(registry, sample_bank, sample_model):
@pytest.mark.asyncio
async def test_cached_registry_initialization(config, sample_bank, sample_model):
async def test_cached_registry_initialization(config, sample_vector_db, sample_model):
# First populate the disk registry
disk_registry = DiskDistributionRegistry(await kvstore_impl(config))
await disk_registry.initialize()
await disk_registry.register(sample_bank)
await disk_registry.register(sample_vector_db)
await disk_registry.register(sample_model)
# Test cached version loads from disk
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
await cached_registry.initialize()
result_bank = await cached_registry.get("memory_bank", "test_bank")
assert result_bank is not None
assert result_bank.identifier == sample_bank.identifier
assert result_bank.embedding_model == sample_bank.embedding_model
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens
assert result_bank.provider_id == sample_bank.provider_id
result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
assert result_vector_db is not None
assert result_vector_db.identifier == sample_vector_db.identifier
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension
assert result_vector_db.provider_id == sample_vector_db.provider_id
@pytest.mark.asyncio
@ -116,29 +112,28 @@ async def test_cached_registry_updates(config):
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
await cached_registry.initialize()
new_bank = VectorMemoryBank(
identifier="test_bank_2",
new_vector_db = VectorDB(
identifier="test_vector_db_2",
embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=256,
overlap_size_in_tokens=32,
provider_resource_id="test_bank_2",
embedding_dimension=384,
provider_resource_id="test_vector_db_2",
provider_id="baz",
)
await cached_registry.register(new_bank)
await cached_registry.register(new_vector_db)
# Verify in cache
result_bank = await cached_registry.get("memory_bank", "test_bank_2")
assert result_bank is not None
assert result_bank.identifier == new_bank.identifier
assert result_bank.provider_id == new_bank.provider_id
result_vector_db = await cached_registry.get("vector_db", "test_vector_db_2")
assert result_vector_db is not None
assert result_vector_db.identifier == new_vector_db.identifier
assert result_vector_db.provider_id == new_vector_db.provider_id
# Verify persisted to disk
new_registry = DiskDistributionRegistry(await kvstore_impl(config))
await new_registry.initialize()
result_bank = await new_registry.get("memory_bank", "test_bank_2")
assert result_bank is not None
assert result_bank.identifier == new_bank.identifier
assert result_bank.provider_id == new_bank.provider_id
result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
assert result_vector_db is not None
assert result_vector_db.identifier == new_vector_db.identifier
assert result_vector_db.provider_id == new_vector_db.provider_id
@pytest.mark.asyncio
@ -146,30 +141,28 @@ async def test_duplicate_provider_registration(config):
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
await cached_registry.initialize()
original_bank = VectorMemoryBank(
identifier="test_bank_2",
original_vector_db = VectorDB(
identifier="test_vector_db_2",
embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=256,
overlap_size_in_tokens=32,
provider_resource_id="test_bank_2",
embedding_dimension=384,
provider_resource_id="test_vector_db_2",
provider_id="baz",
)
await cached_registry.register(original_bank)
await cached_registry.register(original_vector_db)
duplicate_bank = VectorMemoryBank(
identifier="test_bank_2",
duplicate_vector_db = VectorDB(
identifier="test_vector_db_2",
embedding_model="different-model",
chunk_size_in_tokens=128,
overlap_size_in_tokens=16,
provider_resource_id="test_bank_2",
embedding_dimension=384,
provider_resource_id="test_vector_db_2",
provider_id="baz", # Same provider_id
)
await cached_registry.register(duplicate_bank)
await cached_registry.register(duplicate_vector_db)
result = await cached_registry.get("memory_bank", "test_bank_2")
result = await cached_registry.get("vector_db", "test_vector_db_2")
assert result is not None
assert (
result.embedding_model == original_bank.embedding_model
result.embedding_model == original_vector_db.embedding_model
) # Original values preserved
@ -179,36 +172,35 @@ async def test_get_all_objects(config):
await cached_registry.initialize()
# Create multiple test banks
test_banks = [
VectorMemoryBank(
identifier=f"test_bank_{i}",
test_vector_dbs = [
VectorDB(
identifier=f"test_vector_db_{i}",
embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=256,
overlap_size_in_tokens=32,
provider_resource_id=f"test_bank_{i}",
embedding_dimension=384,
provider_resource_id=f"test_vector_db_{i}",
provider_id=f"provider_{i}",
)
for i in range(3)
]
# Register all banks
for bank in test_banks:
await cached_registry.register(bank)
# Register all vector_dbs
for vector_db in test_vector_dbs:
await cached_registry.register(vector_db)
# Test get_all retrieval
all_results = await cached_registry.get_all()
assert len(all_results) == 3
# Verify each bank was stored correctly
for original_bank in test_banks:
matching_banks = [
b for b in all_results if b.identifier == original_bank.identifier
# Verify each vector_db was stored correctly
for original_vector_db in test_vector_dbs:
matching_vector_dbs = [
v for v in all_results if v.identifier == original_vector_db.identifier
]
assert len(matching_banks) == 1
stored_bank = matching_banks[0]
assert stored_bank.embedding_model == original_bank.embedding_model
assert stored_bank.provider_id == original_bank.provider_id
assert stored_bank.chunk_size_in_tokens == original_bank.chunk_size_in_tokens
assert len(matching_vector_dbs) == 1
stored_vector_db = matching_vector_dbs[0]
assert stored_vector_db.embedding_model == original_vector_db.embedding_model
assert stored_vector_db.provider_id == original_vector_db.provider_id
assert (
stored_bank.overlap_size_in_tokens == original_bank.overlap_size_in_tokens
stored_vector_db.embedding_dimension
== original_vector_db.embedding_dimension
)

View file

@ -1,23 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import streamlit as st
from modules.api import llama_stack_api
def memory_banks():
st.header("Memory Banks")
memory_banks_info = {
m.identifier: m.to_dict() for m in llama_stack_api.client.memory_banks.list()
}
if len(memory_banks_info) > 0:
selected_memory_bank = st.selectbox(
"Select a memory bank", list(memory_banks_info.keys())
)
st.json(memory_banks_info[selected_memory_bank])
else:
st.info("No memory banks found")

View file

@ -6,10 +6,10 @@
from page.distribution.datasets import datasets
from page.distribution.eval_tasks import eval_tasks
from page.distribution.memory_banks import memory_banks
from page.distribution.models import models
from page.distribution.scoring_functions import scoring_functions
from page.distribution.shields import shields
from page.distribution.vector_dbs import vector_dbs
from streamlit_option_menu import option_menu
@ -17,7 +17,7 @@ from streamlit_option_menu import option_menu
def resources_page():
options = [
"Models",
"Memory Banks",
"Vector Databases",
"Shields",
"Scoring Functions",
"Datasets",
@ -37,8 +37,8 @@ def resources_page():
)
if selected_resource == "Eval Tasks":
eval_tasks()
elif selected_resource == "Memory Banks":
memory_banks()
elif selected_resource == "Vector Databases":
vector_dbs()
elif selected_resource == "Datasets":
datasets()
elif selected_resource == "Models":

View file

@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import streamlit as st
from modules.api import llama_stack_api
def vector_dbs():
st.header("Vector Databases")
vector_dbs_info = {
v.identifier: v.to_dict() for v in llama_stack_api.client.vector_dbs.list()
}
if len(vector_dbs_info) > 0:
selected_vector_db = st.selectbox(
"Select a vector database", list(vector_dbs_info.keys())
)
st.json(vector_dbs_info[selected_vector_db])
else:
st.info("No vector databases found")

View file

@ -29,12 +29,12 @@ def rag_chat_page():
if uploaded_files:
st.success(f"Successfully uploaded {len(uploaded_files)} files")
# Add memory bank name input field
memory_bank_name = st.text_input(
"Memory Bank Name",
value="rag_bank",
help="Enter a unique identifier for this memory bank",
vector_db_name = st.text_input(
"Vector Database Name",
value="rag_vector_db",
help="Enter a unique identifier for this vector database",
)
if st.button("Create Memory Bank"):
if st.button("Create Vector Database"):
documents = [
Document(
document_id=uploaded_file.name,
@ -44,37 +44,33 @@ def rag_chat_page():
]
providers = llama_stack_api.client.providers.list()
memory_provider = None
vector_io_provider = None
for x in providers:
if x.api == "memory":
memory_provider = x.provider_id
if x.api == "vector_io":
vector_io_provider = x.provider_id
llama_stack_api.client.memory_banks.register(
memory_bank_id=memory_bank_name, # Use the user-provided name
params={
"memory_bank_type": "vector",
"embedding_model": "all-MiniLM-L6-v2",
"chunk_size_in_tokens": 512,
"overlap_size_in_tokens": 64,
},
provider_id=memory_provider,
llama_stack_api.client.vector_dbs.register(
vector_db_id=vector_db_name, # Use the user-provided name
embedding_dimension=384,
embedding_model="all-MiniLM-L6-v2",
provider_id=vector_io_provider,
)
# insert documents using the custom bank name
llama_stack_api.client.memory.insert(
bank_id=memory_bank_name, # Use the user-provided name
# insert documents using the custom vector db name
llama_stack_api.client.tool_runtime.rag_tool.insert(
vector_db_id=vector_db_name, # Use the user-provided name
documents=documents,
)
st.success("Memory bank created successfully!")
st.success("Vector database created successfully!")
st.subheader("Configure Agent")
# select memory banks
memory_banks = llama_stack_api.client.memory_banks.list()
memory_banks = [bank.identifier for bank in memory_banks]
selected_memory_banks = st.multiselect(
"Select Memory Banks",
memory_banks,
vector_dbs = llama_stack_api.client.vector_dbs.list()
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
selected_vector_dbs = st.multiselect(
"Select Vector Databases",
vector_dbs,
)
available_models = llama_stack_api.client.models.list()
@ -141,14 +137,14 @@ def rag_chat_page():
dict(
name="builtin::memory",
args={
"memory_bank_ids": [bank_id for bank_id in selected_memory_banks],
"vector_db_ids": [
vector_db_id for vector_db_id in selected_vector_dbs
],
},
)
],
tool_choice="auto",
tool_prompt_format="json",
input_shields=[],
output_shields=[],
enable_session_persistence=False,
)