mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
[memory refactor][5/n] Migrate all vector_io providers (#835)
See https://github.com/meta-llama/llama-stack/issues/827 for the broader design. This PR finishes off all the stragglers and migrates everything to the new naming.
This commit is contained in:
parent
63f37f9b7c
commit
c9e5578151
78 changed files with 504 additions and 623 deletions
|
@ -8,11 +8,11 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::nvidia` |
|
||||
| memory | `inline::faiss` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss` |
|
||||
|
||||
|
||||
### Environment Variables
|
||||
|
|
|
@ -15,11 +15,11 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::bedrock` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `remote::bedrock` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -8,11 +8,11 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::cerebras` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
### Environment Variables
|
||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::fireworks` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
### Environment Variables
|
||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `inline::meta-reference` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
|
||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `inline::meta-reference-quantized` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
|
||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::ollama` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables
|
||||
|
|
|
@ -17,11 +17,11 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::vllm` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.
|
||||
|
|
|
@ -19,11 +19,11 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::tgi` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.
|
||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-together` distribution consists of the following pr
|
|||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::together` |
|
||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
||||
|
||||
### Environment Variables
|
||||
|
|
|
@ -88,7 +88,7 @@ class MemoryRetrievalStep(StepCommon):
|
|||
step_type: Literal[StepType.memory_retrieval.value] = (
|
||||
StepType.memory_retrieval.value
|
||||
)
|
||||
memory_bank_ids: List[str]
|
||||
vector_db_ids: str
|
||||
inserted_context: InterleavedContent
|
||||
|
||||
|
||||
|
|
|
@ -208,7 +208,7 @@ class EventLogger:
|
|||
):
|
||||
details = event.payload.step_details
|
||||
inserted_context = interleaved_content_as_str(details.inserted_context)
|
||||
content = f"fetched {len(inserted_context)} bytes from {details.memory_bank_ids}"
|
||||
content = f"fetched {len(inserted_context)} bytes from {details.vector_db_ids}"
|
||||
|
||||
yield (
|
||||
event,
|
||||
|
|
|
@ -37,5 +37,5 @@ class Resource(BaseModel):
|
|||
provider_id: str = Field(description="ID of the provider that owns this resource")
|
||||
|
||||
type: ResourceType = Field(
|
||||
description="Type of resource (e.g. 'model', 'shield', 'memory_bank', etc.)"
|
||||
description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)"
|
||||
)
|
||||
|
|
|
@ -9,7 +9,7 @@ import os
|
|||
import pytest
|
||||
import pytest_asyncio
|
||||
from llama_stack.apis.inference import Model
|
||||
from llama_stack.apis.memory_banks import VectorMemoryBank
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
|
||||
from llama_stack.distribution.store.registry import (
|
||||
CachedDiskDistributionRegistry,
|
||||
|
@ -42,13 +42,12 @@ async def cached_registry(config):
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_bank():
|
||||
return VectorMemoryBank(
|
||||
identifier="test_bank",
|
||||
def sample_vector_db():
|
||||
return VectorDB(
|
||||
identifier="test_vector_db",
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
chunk_size_in_tokens=512,
|
||||
overlap_size_in_tokens=64,
|
||||
provider_resource_id="test_bank",
|
||||
embedding_dimension=384,
|
||||
provider_resource_id="test_vector_db",
|
||||
provider_id="test-provider",
|
||||
)
|
||||
|
||||
|
@ -70,19 +69,17 @@ async def test_registry_initialization(registry):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_registration(registry, sample_bank, sample_model):
|
||||
print(f"Registering {sample_bank}")
|
||||
await registry.register(sample_bank)
|
||||
async def test_basic_registration(registry, sample_vector_db, sample_model):
|
||||
print(f"Registering {sample_vector_db}")
|
||||
await registry.register(sample_vector_db)
|
||||
print(f"Registering {sample_model}")
|
||||
await registry.register(sample_model)
|
||||
print("Getting bank")
|
||||
result_bank = await registry.get("memory_bank", "test_bank")
|
||||
assert result_bank is not None
|
||||
assert result_bank.identifier == sample_bank.identifier
|
||||
assert result_bank.embedding_model == sample_bank.embedding_model
|
||||
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens
|
||||
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens
|
||||
assert result_bank.provider_id == sample_bank.provider_id
|
||||
print("Getting vector_db")
|
||||
result_vector_db = await registry.get("vector_db", "test_vector_db")
|
||||
assert result_vector_db is not None
|
||||
assert result_vector_db.identifier == sample_vector_db.identifier
|
||||
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
|
||||
assert result_vector_db.provider_id == sample_vector_db.provider_id
|
||||
|
||||
result_model = await registry.get("model", "test_model")
|
||||
assert result_model is not None
|
||||
|
@ -91,24 +88,23 @@ async def test_basic_registration(registry, sample_bank, sample_model):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cached_registry_initialization(config, sample_bank, sample_model):
|
||||
async def test_cached_registry_initialization(config, sample_vector_db, sample_model):
|
||||
# First populate the disk registry
|
||||
disk_registry = DiskDistributionRegistry(await kvstore_impl(config))
|
||||
await disk_registry.initialize()
|
||||
await disk_registry.register(sample_bank)
|
||||
await disk_registry.register(sample_vector_db)
|
||||
await disk_registry.register(sample_model)
|
||||
|
||||
# Test cached version loads from disk
|
||||
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
||||
await cached_registry.initialize()
|
||||
|
||||
result_bank = await cached_registry.get("memory_bank", "test_bank")
|
||||
assert result_bank is not None
|
||||
assert result_bank.identifier == sample_bank.identifier
|
||||
assert result_bank.embedding_model == sample_bank.embedding_model
|
||||
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens
|
||||
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens
|
||||
assert result_bank.provider_id == sample_bank.provider_id
|
||||
result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
|
||||
assert result_vector_db is not None
|
||||
assert result_vector_db.identifier == sample_vector_db.identifier
|
||||
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
|
||||
assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension
|
||||
assert result_vector_db.provider_id == sample_vector_db.provider_id
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
@ -116,29 +112,28 @@ async def test_cached_registry_updates(config):
|
|||
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
||||
await cached_registry.initialize()
|
||||
|
||||
new_bank = VectorMemoryBank(
|
||||
identifier="test_bank_2",
|
||||
new_vector_db = VectorDB(
|
||||
identifier="test_vector_db_2",
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
chunk_size_in_tokens=256,
|
||||
overlap_size_in_tokens=32,
|
||||
provider_resource_id="test_bank_2",
|
||||
embedding_dimension=384,
|
||||
provider_resource_id="test_vector_db_2",
|
||||
provider_id="baz",
|
||||
)
|
||||
await cached_registry.register(new_bank)
|
||||
await cached_registry.register(new_vector_db)
|
||||
|
||||
# Verify in cache
|
||||
result_bank = await cached_registry.get("memory_bank", "test_bank_2")
|
||||
assert result_bank is not None
|
||||
assert result_bank.identifier == new_bank.identifier
|
||||
assert result_bank.provider_id == new_bank.provider_id
|
||||
result_vector_db = await cached_registry.get("vector_db", "test_vector_db_2")
|
||||
assert result_vector_db is not None
|
||||
assert result_vector_db.identifier == new_vector_db.identifier
|
||||
assert result_vector_db.provider_id == new_vector_db.provider_id
|
||||
|
||||
# Verify persisted to disk
|
||||
new_registry = DiskDistributionRegistry(await kvstore_impl(config))
|
||||
await new_registry.initialize()
|
||||
result_bank = await new_registry.get("memory_bank", "test_bank_2")
|
||||
assert result_bank is not None
|
||||
assert result_bank.identifier == new_bank.identifier
|
||||
assert result_bank.provider_id == new_bank.provider_id
|
||||
result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
|
||||
assert result_vector_db is not None
|
||||
assert result_vector_db.identifier == new_vector_db.identifier
|
||||
assert result_vector_db.provider_id == new_vector_db.provider_id
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
@ -146,30 +141,28 @@ async def test_duplicate_provider_registration(config):
|
|||
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
||||
await cached_registry.initialize()
|
||||
|
||||
original_bank = VectorMemoryBank(
|
||||
identifier="test_bank_2",
|
||||
original_vector_db = VectorDB(
|
||||
identifier="test_vector_db_2",
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
chunk_size_in_tokens=256,
|
||||
overlap_size_in_tokens=32,
|
||||
provider_resource_id="test_bank_2",
|
||||
embedding_dimension=384,
|
||||
provider_resource_id="test_vector_db_2",
|
||||
provider_id="baz",
|
||||
)
|
||||
await cached_registry.register(original_bank)
|
||||
await cached_registry.register(original_vector_db)
|
||||
|
||||
duplicate_bank = VectorMemoryBank(
|
||||
identifier="test_bank_2",
|
||||
duplicate_vector_db = VectorDB(
|
||||
identifier="test_vector_db_2",
|
||||
embedding_model="different-model",
|
||||
chunk_size_in_tokens=128,
|
||||
overlap_size_in_tokens=16,
|
||||
provider_resource_id="test_bank_2",
|
||||
embedding_dimension=384,
|
||||
provider_resource_id="test_vector_db_2",
|
||||
provider_id="baz", # Same provider_id
|
||||
)
|
||||
await cached_registry.register(duplicate_bank)
|
||||
await cached_registry.register(duplicate_vector_db)
|
||||
|
||||
result = await cached_registry.get("memory_bank", "test_bank_2")
|
||||
result = await cached_registry.get("vector_db", "test_vector_db_2")
|
||||
assert result is not None
|
||||
assert (
|
||||
result.embedding_model == original_bank.embedding_model
|
||||
result.embedding_model == original_vector_db.embedding_model
|
||||
) # Original values preserved
|
||||
|
||||
|
||||
|
@ -179,36 +172,35 @@ async def test_get_all_objects(config):
|
|||
await cached_registry.initialize()
|
||||
|
||||
# Create multiple test banks
|
||||
test_banks = [
|
||||
VectorMemoryBank(
|
||||
identifier=f"test_bank_{i}",
|
||||
test_vector_dbs = [
|
||||
VectorDB(
|
||||
identifier=f"test_vector_db_{i}",
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
chunk_size_in_tokens=256,
|
||||
overlap_size_in_tokens=32,
|
||||
provider_resource_id=f"test_bank_{i}",
|
||||
embedding_dimension=384,
|
||||
provider_resource_id=f"test_vector_db_{i}",
|
||||
provider_id=f"provider_{i}",
|
||||
)
|
||||
for i in range(3)
|
||||
]
|
||||
|
||||
# Register all banks
|
||||
for bank in test_banks:
|
||||
await cached_registry.register(bank)
|
||||
# Register all vector_dbs
|
||||
for vector_db in test_vector_dbs:
|
||||
await cached_registry.register(vector_db)
|
||||
|
||||
# Test get_all retrieval
|
||||
all_results = await cached_registry.get_all()
|
||||
assert len(all_results) == 3
|
||||
|
||||
# Verify each bank was stored correctly
|
||||
for original_bank in test_banks:
|
||||
matching_banks = [
|
||||
b for b in all_results if b.identifier == original_bank.identifier
|
||||
# Verify each vector_db was stored correctly
|
||||
for original_vector_db in test_vector_dbs:
|
||||
matching_vector_dbs = [
|
||||
v for v in all_results if v.identifier == original_vector_db.identifier
|
||||
]
|
||||
assert len(matching_banks) == 1
|
||||
stored_bank = matching_banks[0]
|
||||
assert stored_bank.embedding_model == original_bank.embedding_model
|
||||
assert stored_bank.provider_id == original_bank.provider_id
|
||||
assert stored_bank.chunk_size_in_tokens == original_bank.chunk_size_in_tokens
|
||||
assert len(matching_vector_dbs) == 1
|
||||
stored_vector_db = matching_vector_dbs[0]
|
||||
assert stored_vector_db.embedding_model == original_vector_db.embedding_model
|
||||
assert stored_vector_db.provider_id == original_vector_db.provider_id
|
||||
assert (
|
||||
stored_bank.overlap_size_in_tokens == original_bank.overlap_size_in_tokens
|
||||
stored_vector_db.embedding_dimension
|
||||
== original_vector_db.embedding_dimension
|
||||
)
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def memory_banks():
|
||||
st.header("Memory Banks")
|
||||
memory_banks_info = {
|
||||
m.identifier: m.to_dict() for m in llama_stack_api.client.memory_banks.list()
|
||||
}
|
||||
|
||||
if len(memory_banks_info) > 0:
|
||||
selected_memory_bank = st.selectbox(
|
||||
"Select a memory bank", list(memory_banks_info.keys())
|
||||
)
|
||||
st.json(memory_banks_info[selected_memory_bank])
|
||||
else:
|
||||
st.info("No memory banks found")
|
|
@ -6,10 +6,10 @@
|
|||
|
||||
from page.distribution.datasets import datasets
|
||||
from page.distribution.eval_tasks import eval_tasks
|
||||
from page.distribution.memory_banks import memory_banks
|
||||
from page.distribution.models import models
|
||||
from page.distribution.scoring_functions import scoring_functions
|
||||
from page.distribution.shields import shields
|
||||
from page.distribution.vector_dbs import vector_dbs
|
||||
|
||||
from streamlit_option_menu import option_menu
|
||||
|
||||
|
@ -17,7 +17,7 @@ from streamlit_option_menu import option_menu
|
|||
def resources_page():
|
||||
options = [
|
||||
"Models",
|
||||
"Memory Banks",
|
||||
"Vector Databases",
|
||||
"Shields",
|
||||
"Scoring Functions",
|
||||
"Datasets",
|
||||
|
@ -37,8 +37,8 @@ def resources_page():
|
|||
)
|
||||
if selected_resource == "Eval Tasks":
|
||||
eval_tasks()
|
||||
elif selected_resource == "Memory Banks":
|
||||
memory_banks()
|
||||
elif selected_resource == "Vector Databases":
|
||||
vector_dbs()
|
||||
elif selected_resource == "Datasets":
|
||||
datasets()
|
||||
elif selected_resource == "Models":
|
||||
|
|
23
llama_stack/distribution/ui/page/distribution/vector_dbs.py
Normal file
23
llama_stack/distribution/ui/page/distribution/vector_dbs.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import streamlit as st
|
||||
from modules.api import llama_stack_api
|
||||
|
||||
|
||||
def vector_dbs():
|
||||
st.header("Vector Databases")
|
||||
vector_dbs_info = {
|
||||
v.identifier: v.to_dict() for v in llama_stack_api.client.vector_dbs.list()
|
||||
}
|
||||
|
||||
if len(vector_dbs_info) > 0:
|
||||
selected_vector_db = st.selectbox(
|
||||
"Select a vector database", list(vector_dbs_info.keys())
|
||||
)
|
||||
st.json(vector_dbs_info[selected_vector_db])
|
||||
else:
|
||||
st.info("No vector databases found")
|
|
@ -29,12 +29,12 @@ def rag_chat_page():
|
|||
if uploaded_files:
|
||||
st.success(f"Successfully uploaded {len(uploaded_files)} files")
|
||||
# Add memory bank name input field
|
||||
memory_bank_name = st.text_input(
|
||||
"Memory Bank Name",
|
||||
value="rag_bank",
|
||||
help="Enter a unique identifier for this memory bank",
|
||||
vector_db_name = st.text_input(
|
||||
"Vector Database Name",
|
||||
value="rag_vector_db",
|
||||
help="Enter a unique identifier for this vector database",
|
||||
)
|
||||
if st.button("Create Memory Bank"):
|
||||
if st.button("Create Vector Database"):
|
||||
documents = [
|
||||
Document(
|
||||
document_id=uploaded_file.name,
|
||||
|
@ -44,37 +44,33 @@ def rag_chat_page():
|
|||
]
|
||||
|
||||
providers = llama_stack_api.client.providers.list()
|
||||
memory_provider = None
|
||||
vector_io_provider = None
|
||||
|
||||
for x in providers:
|
||||
if x.api == "memory":
|
||||
memory_provider = x.provider_id
|
||||
if x.api == "vector_io":
|
||||
vector_io_provider = x.provider_id
|
||||
|
||||
llama_stack_api.client.memory_banks.register(
|
||||
memory_bank_id=memory_bank_name, # Use the user-provided name
|
||||
params={
|
||||
"memory_bank_type": "vector",
|
||||
"embedding_model": "all-MiniLM-L6-v2",
|
||||
"chunk_size_in_tokens": 512,
|
||||
"overlap_size_in_tokens": 64,
|
||||
},
|
||||
provider_id=memory_provider,
|
||||
llama_stack_api.client.vector_dbs.register(
|
||||
vector_db_id=vector_db_name, # Use the user-provided name
|
||||
embedding_dimension=384,
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
provider_id=vector_io_provider,
|
||||
)
|
||||
|
||||
# insert documents using the custom bank name
|
||||
llama_stack_api.client.memory.insert(
|
||||
bank_id=memory_bank_name, # Use the user-provided name
|
||||
# insert documents using the custom vector db name
|
||||
llama_stack_api.client.tool_runtime.rag_tool.insert(
|
||||
vector_db_id=vector_db_name, # Use the user-provided name
|
||||
documents=documents,
|
||||
)
|
||||
st.success("Memory bank created successfully!")
|
||||
st.success("Vector database created successfully!")
|
||||
|
||||
st.subheader("Configure Agent")
|
||||
# select memory banks
|
||||
memory_banks = llama_stack_api.client.memory_banks.list()
|
||||
memory_banks = [bank.identifier for bank in memory_banks]
|
||||
selected_memory_banks = st.multiselect(
|
||||
"Select Memory Banks",
|
||||
memory_banks,
|
||||
vector_dbs = llama_stack_api.client.vector_dbs.list()
|
||||
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
|
||||
selected_vector_dbs = st.multiselect(
|
||||
"Select Vector Databases",
|
||||
vector_dbs,
|
||||
)
|
||||
|
||||
available_models = llama_stack_api.client.models.list()
|
||||
|
@ -141,14 +137,14 @@ def rag_chat_page():
|
|||
dict(
|
||||
name="builtin::memory",
|
||||
args={
|
||||
"memory_bank_ids": [bank_id for bank_id in selected_memory_banks],
|
||||
"vector_db_ids": [
|
||||
vector_db_id for vector_db_id in selected_vector_dbs
|
||||
],
|
||||
},
|
||||
)
|
||||
],
|
||||
tool_choice="auto",
|
||||
tool_prompt_format="json",
|
||||
input_shields=[],
|
||||
output_shields=[],
|
||||
enable_session_persistence=False,
|
||||
)
|
||||
|
||||
|
|
|
@ -413,8 +413,8 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
session_info = await self.storage.get_session_info(session_id)
|
||||
|
||||
# if the session has a memory bank id, let the memory tool use it
|
||||
if session_info.memory_bank_id:
|
||||
vector_db_ids.append(session_info.memory_bank_id)
|
||||
if session_info.vector_db_id:
|
||||
vector_db_ids.append(session_info.vector_db_id)
|
||||
|
||||
yield AgentTurnResponseStreamChunk(
|
||||
event=AgentTurnResponseEvent(
|
||||
|
@ -829,7 +829,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
msg = await attachment_message(self.tempdir, url_items)
|
||||
input_messages.append(msg)
|
||||
# Since memory is present, add all the data to the memory bank
|
||||
await self.add_to_session_memory_bank(session_id, documents)
|
||||
await self.add_to_session_vector_db(session_id, documents)
|
||||
elif code_interpreter_tool:
|
||||
# if only code_interpreter is available, we download the URLs to a tempdir
|
||||
# and attach the path to them as a message to inference with the
|
||||
|
@ -838,7 +838,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
input_messages.append(msg)
|
||||
elif memory_tool:
|
||||
# if only memory is available, we load the data from the URLs and content items to the memory bank
|
||||
await self.add_to_session_memory_bank(session_id, documents)
|
||||
await self.add_to_session_vector_db(session_id, documents)
|
||||
else:
|
||||
# if no memory or code_interpreter tool is available,
|
||||
# we try to load the data from the URLs and content items as a message to inference
|
||||
|
@ -848,31 +848,31 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
+ await load_data_from_urls(url_items)
|
||||
)
|
||||
|
||||
async def _ensure_memory_bank(self, session_id: str) -> str:
|
||||
async def _ensure_vector_db(self, session_id: str) -> str:
|
||||
session_info = await self.storage.get_session_info(session_id)
|
||||
if session_info is None:
|
||||
raise ValueError(f"Session {session_id} not found")
|
||||
|
||||
if session_info.memory_bank_id is None:
|
||||
bank_id = f"memory_bank_{session_id}"
|
||||
if session_info.vector_db_id is None:
|
||||
vector_db_id = f"vector_db_{session_id}"
|
||||
|
||||
# TODO: the semantic for registration is definitely not "creation"
|
||||
# so we need to fix it if we expect the agent to create a new vector db
|
||||
# for each session
|
||||
await self.vector_io_api.register_vector_db(
|
||||
vector_db_id=bank_id,
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
)
|
||||
await self.storage.add_memory_bank_to_session(session_id, bank_id)
|
||||
await self.storage.add_vector_db_to_session(session_id, vector_db_id)
|
||||
else:
|
||||
bank_id = session_info.memory_bank_id
|
||||
vector_db_id = session_info.vector_db_id
|
||||
|
||||
return bank_id
|
||||
return vector_db_id
|
||||
|
||||
async def add_to_session_memory_bank(
|
||||
async def add_to_session_vector_db(
|
||||
self, session_id: str, data: List[Document]
|
||||
) -> None:
|
||||
vector_db_id = await self._ensure_memory_bank(session_id)
|
||||
vector_db_id = await self._ensure_vector_db(session_id)
|
||||
documents = [
|
||||
RAGDocument(
|
||||
document_id=str(uuid.uuid4()),
|
||||
|
|
|
@ -21,7 +21,7 @@ log = logging.getLogger(__name__)
|
|||
class AgentSessionInfo(BaseModel):
|
||||
session_id: str
|
||||
session_name: str
|
||||
memory_bank_id: Optional[str] = None
|
||||
vector_db_id: Optional[str] = None
|
||||
started_at: datetime
|
||||
|
||||
|
||||
|
@ -52,12 +52,12 @@ class AgentPersistence:
|
|||
|
||||
return AgentSessionInfo(**json.loads(value))
|
||||
|
||||
async def add_memory_bank_to_session(self, session_id: str, bank_id: str):
|
||||
async def add_vector_db_to_session(self, session_id: str, vector_db_id: str):
|
||||
session_info = await self.get_session_info(session_id)
|
||||
if session_info is None:
|
||||
raise ValueError(f"Session {session_id} not found")
|
||||
|
||||
session_info.memory_bank_id = bank_id
|
||||
session_info.vector_db_id = vector_db_id
|
||||
await self.kvstore.set(
|
||||
key=f"session:{self.agent_id}:{session_id}",
|
||||
value=session_info.model_dump_json(),
|
||||
|
|
|
@ -29,10 +29,9 @@ from llama_stack.apis.inference import (
|
|||
SamplingParams,
|
||||
ToolChoice,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
UserMessage,
|
||||
)
|
||||
from llama_stack.apis.memory import MemoryBank
|
||||
from llama_stack.apis.memory_banks import BankParams, VectorMemoryBank
|
||||
from llama_stack.apis.safety import RunShieldResponse
|
||||
from llama_stack.apis.tools import (
|
||||
Tool,
|
||||
|
@ -40,8 +39,9 @@ from llama_stack.apis.tools import (
|
|||
ToolGroup,
|
||||
ToolHost,
|
||||
ToolInvocationResult,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_stack.apis.vector_io import QueryChunksResponse
|
||||
|
||||
from llama_stack.providers.inline.agents.meta_reference.agent_instance import (
|
||||
MEMORY_QUERY_TOOL,
|
||||
)
|
||||
|
@ -110,68 +110,22 @@ class MockSafetyAPI:
|
|||
return RunShieldResponse(violation=None)
|
||||
|
||||
|
||||
class MockMemoryAPI:
|
||||
class MockVectorIOAPI:
|
||||
def __init__(self):
|
||||
self.memory_banks = {}
|
||||
self.documents = {}
|
||||
self.chunks = {}
|
||||
|
||||
async def create_memory_bank(self, name, config, url=None):
|
||||
bank_id = f"bank_{len(self.memory_banks)}"
|
||||
bank = MemoryBank(bank_id, name, config, url)
|
||||
self.memory_banks[bank_id] = bank
|
||||
self.documents[bank_id] = {}
|
||||
return bank
|
||||
async def insert_chunks(self, vector_db_id, chunks, ttl_seconds=None):
|
||||
for chunk in chunks:
|
||||
metadata = chunk.metadata
|
||||
self.chunks[vector_db_id][metadata["document_id"]] = chunk
|
||||
|
||||
async def list_memory_banks(self):
|
||||
return list(self.memory_banks.values())
|
||||
async def query_chunks(self, vector_db_id, query, params=None):
|
||||
if vector_db_id not in self.chunks:
|
||||
raise ValueError(f"Bank {vector_db_id} not found")
|
||||
|
||||
async def get_memory_bank(self, bank_id):
|
||||
return self.memory_banks.get(bank_id)
|
||||
|
||||
async def drop_memory_bank(self, bank_id):
|
||||
if bank_id in self.memory_banks:
|
||||
del self.memory_banks[bank_id]
|
||||
del self.documents[bank_id]
|
||||
return bank_id
|
||||
|
||||
async def insert_documents(self, bank_id, documents, ttl_seconds=None):
|
||||
if bank_id not in self.documents:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
for doc in documents:
|
||||
self.documents[bank_id][doc.document_id] = doc
|
||||
|
||||
async def update_documents(self, bank_id, documents):
|
||||
if bank_id not in self.documents:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
for doc in documents:
|
||||
if doc.document_id in self.documents[bank_id]:
|
||||
self.documents[bank_id][doc.document_id] = doc
|
||||
|
||||
async def query_documents(self, bank_id, query, params=None):
|
||||
if bank_id not in self.documents:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
# Simple mock implementation: return all documents
|
||||
chunks = [
|
||||
{"content": doc.content, "token_count": 10, "document_id": doc.document_id}
|
||||
for doc in self.documents[bank_id].values()
|
||||
]
|
||||
chunks = list(self.chunks[vector_db_id].values())
|
||||
scores = [1.0] * len(chunks)
|
||||
return {"chunks": chunks, "scores": scores}
|
||||
|
||||
async def get_documents(self, bank_id, document_ids):
|
||||
if bank_id not in self.documents:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
return [
|
||||
self.documents[bank_id][doc_id]
|
||||
for doc_id in document_ids
|
||||
if doc_id in self.documents[bank_id]
|
||||
]
|
||||
|
||||
async def delete_documents(self, bank_id, document_ids):
|
||||
if bank_id not in self.documents:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
for doc_id in document_ids:
|
||||
self.documents[bank_id].pop(doc_id, None)
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
|
||||
class MockToolGroupsAPI:
|
||||
|
@ -241,31 +195,6 @@ class MockToolRuntimeAPI:
|
|||
return ToolInvocationResult(content={"result": "Mock tool result"})
|
||||
|
||||
|
||||
class MockMemoryBanksAPI:
|
||||
async def list_memory_banks(self) -> List[MemoryBank]:
|
||||
return []
|
||||
|
||||
async def get_memory_bank(self, memory_bank_id: str) -> Optional[MemoryBank]:
|
||||
return None
|
||||
|
||||
async def register_memory_bank(
|
||||
self,
|
||||
memory_bank_id: str,
|
||||
params: BankParams,
|
||||
provider_id: Optional[str] = None,
|
||||
provider_memory_bank_id: Optional[str] = None,
|
||||
) -> MemoryBank:
|
||||
return VectorMemoryBank(
|
||||
identifier=memory_bank_id,
|
||||
provider_resource_id=provider_memory_bank_id or memory_bank_id,
|
||||
embedding_model="mock_model",
|
||||
chunk_size_in_tokens=512,
|
||||
)
|
||||
|
||||
async def unregister_memory_bank(self, memory_bank_id: str) -> None:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_inference_api():
|
||||
return MockInferenceAPI()
|
||||
|
@ -277,8 +206,8 @@ def mock_safety_api():
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_memory_api():
|
||||
return MockMemoryAPI()
|
||||
def mock_vector_io_api():
|
||||
return MockVectorIOAPI()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -291,17 +220,11 @@ def mock_tool_runtime_api():
|
|||
return MockToolRuntimeAPI()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_memory_banks_api():
|
||||
return MockMemoryBanksAPI()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def get_agents_impl(
|
||||
mock_inference_api,
|
||||
mock_safety_api,
|
||||
mock_memory_api,
|
||||
mock_memory_banks_api,
|
||||
mock_vector_io_api,
|
||||
mock_tool_runtime_api,
|
||||
mock_tool_groups_api,
|
||||
):
|
||||
|
@ -314,8 +237,7 @@ async def get_agents_impl(
|
|||
),
|
||||
inference_api=mock_inference_api,
|
||||
safety_api=mock_safety_api,
|
||||
memory_api=mock_memory_api,
|
||||
memory_banks_api=mock_memory_banks_api,
|
||||
vector_io_api=mock_vector_io_api,
|
||||
tool_runtime_api=mock_tool_runtime_api,
|
||||
tool_groups_api=mock_tool_groups_api,
|
||||
)
|
||||
|
@ -484,7 +406,7 @@ async def test_chat_agent_tools(
|
|||
toolgroups_for_turn=[
|
||||
AgentToolGroupWithArgs(
|
||||
name=MEMORY_TOOLGROUP,
|
||||
args={"memory_banks": ["test_memory_bank"]},
|
||||
args={"vector_dbs": ["test_vector_db"]},
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
|
@ -14,8 +14,10 @@ from .config import ChromaInlineImplConfig
|
|||
async def get_provider_impl(
|
||||
config: ChromaInlineImplConfig, deps: Dict[Api, ProviderSpec]
|
||||
):
|
||||
from llama_stack.providers.remote.memory.chroma.chroma import ChromaMemoryAdapter
|
||||
from llama_stack.providers.remote.vector_io.chroma.chroma import (
|
||||
ChromaVectorIOAdapter,
|
||||
)
|
||||
|
||||
impl = ChromaMemoryAdapter(config, deps[Api.inference])
|
||||
impl = ChromaVectorIOAdapter(config, deps[Api.inference])
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
@ -14,8 +14,8 @@ from .config import ChromaRemoteImplConfig
|
|||
async def get_adapter_impl(
|
||||
config: ChromaRemoteImplConfig, deps: Dict[Api, ProviderSpec]
|
||||
):
|
||||
from .chroma import ChromaMemoryAdapter
|
||||
from .chroma import ChromaVectorIOAdapter
|
||||
|
||||
impl = ChromaMemoryAdapter(config, deps[Api.inference])
|
||||
impl = ChromaVectorIOAdapter(config, deps[Api.inference])
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
@ -6,25 +6,20 @@
|
|||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import chromadb
|
||||
from numpy.typing import NDArray
|
||||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.memory import (
|
||||
Chunk,
|
||||
Memory,
|
||||
MemoryBankDocument,
|
||||
QueryDocumentsResponse,
|
||||
)
|
||||
from llama_stack.apis.memory_banks import MemoryBank, MemoryBankType
|
||||
from llama_stack.providers.datatypes import Api, MemoryBanksProtocolPrivate
|
||||
from llama_stack.providers.inline.memory.chroma import ChromaInlineImplConfig
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.chroma import ChromaInlineImplConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
BankWithIndex,
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
)
|
||||
from .config import ChromaRemoteImplConfig
|
||||
|
||||
|
@ -61,7 +56,7 @@ class ChromaIndex(EmbeddingIndex):
|
|||
|
||||
async def query(
|
||||
self, embedding: NDArray, k: int, score_threshold: float
|
||||
) -> QueryDocumentsResponse:
|
||||
) -> QueryChunksResponse:
|
||||
results = await maybe_await(
|
||||
self.collection.query(
|
||||
query_embeddings=[embedding.tolist()],
|
||||
|
@ -85,19 +80,19 @@ class ChromaIndex(EmbeddingIndex):
|
|||
chunks.append(chunk)
|
||||
scores.append(1.0 / float(dist))
|
||||
|
||||
return QueryDocumentsResponse(chunks=chunks, scores=scores)
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def delete(self):
|
||||
await maybe_await(self.client.delete_collection(self.collection.name))
|
||||
|
||||
|
||||
class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
||||
class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||
def __init__(
|
||||
self,
|
||||
config: Union[ChromaRemoteImplConfig, ChromaInlineImplConfig],
|
||||
inference_api: Api.inference,
|
||||
) -> None:
|
||||
log.info(f"Initializing ChromaMemoryAdapter with url: {config}")
|
||||
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
|
||||
|
@ -123,60 +118,58 @@ class ChromaMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def register_memory_bank(
|
||||
async def register_vector_db(
|
||||
self,
|
||||
memory_bank: MemoryBank,
|
||||
vector_db: VectorDB,
|
||||
) -> None:
|
||||
assert (
|
||||
memory_bank.memory_bank_type == MemoryBankType.vector.value
|
||||
), f"Only vector banks are supported {memory_bank.memory_bank_type}"
|
||||
|
||||
collection = await maybe_await(
|
||||
self.client.get_or_create_collection(
|
||||
name=memory_bank.identifier,
|
||||
metadata={"bank": memory_bank.model_dump_json()},
|
||||
name=vector_db.identifier,
|
||||
metadata={"vector_db": vector_db.model_dump_json()},
|
||||
)
|
||||
)
|
||||
self.cache[memory_bank.identifier] = BankWithIndex(
|
||||
memory_bank, ChromaIndex(self.client, collection), self.inference_api
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(
|
||||
vector_db, ChromaIndex(self.client, collection), self.inference_api
|
||||
)
|
||||
|
||||
async def unregister_memory_bank(self, memory_bank_id: str) -> None:
|
||||
await self.cache[memory_bank_id].index.delete()
|
||||
del self.cache[memory_bank_id]
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
|
||||
async def insert_documents(
|
||||
async def insert_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
documents: List[MemoryBankDocument],
|
||||
ttl_seconds: Optional[int] = None,
|
||||
vector_db_id: str,
|
||||
chunks: List[Chunk],
|
||||
embeddings: NDArray,
|
||||
) -> None:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
|
||||
await index.insert_documents(documents)
|
||||
await index.insert_chunks(chunks, embeddings)
|
||||
|
||||
async def query_documents(
|
||||
async def query_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
vector_db_id: str,
|
||||
query: InterleavedContent,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
) -> QueryDocumentsResponse:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
|
||||
return await index.query_documents(query, params)
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
async def _get_and_cache_bank_index(self, bank_id: str) -> BankWithIndex:
|
||||
if bank_id in self.cache:
|
||||
return self.cache[bank_id]
|
||||
async def _get_and_cache_vector_db_index(
|
||||
self, vector_db_id: str
|
||||
) -> VectorDBWithIndex:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
|
||||
bank = await self.memory_bank_store.get_memory_bank(bank_id)
|
||||
if not bank:
|
||||
raise ValueError(f"Bank {bank_id} not found in Llama Stack")
|
||||
collection = await maybe_await(self.client.get_collection(bank_id))
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
|
||||
collection = await maybe_await(self.client.get_collection(vector_db_id))
|
||||
if not collection:
|
||||
raise ValueError(f"Bank {bank_id} not found in Chroma")
|
||||
index = BankWithIndex(
|
||||
bank, ChromaIndex(self.client, collection), self.inference_api
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
|
||||
index = VectorDBWithIndex(
|
||||
vector_db, ChromaIndex(self.client, collection), self.inference_api
|
||||
)
|
||||
self.cache[bank_id] = index
|
||||
self.cache[vector_db_id] = index
|
||||
return index
|
||||
|
|
|
@ -12,21 +12,16 @@ from numpy.typing import NDArray
|
|||
from psycopg2 import sql
|
||||
from psycopg2.extras import execute_values, Json
|
||||
|
||||
from pydantic import BaseModel, parse_obj_as
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.memory import (
|
||||
Chunk,
|
||||
Memory,
|
||||
MemoryBankDocument,
|
||||
QueryDocumentsResponse,
|
||||
)
|
||||
from llama_stack.apis.memory_banks import MemoryBank, MemoryBankType, VectorMemoryBank
|
||||
from llama_stack.providers.datatypes import Api, MemoryBanksProtocolPrivate
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
BankWithIndex,
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
)
|
||||
|
||||
from .config import PGVectorConfig
|
||||
|
@ -50,20 +45,20 @@ def upsert_models(cur, keys_models: List[Tuple[str, BaseModel]]):
|
|||
"""
|
||||
)
|
||||
|
||||
values = [(key, Json(model.dict())) for key, model in keys_models]
|
||||
values = [(key, Json(model.model_dump())) for key, model in keys_models]
|
||||
execute_values(cur, query, values, template="(%s, %s)")
|
||||
|
||||
|
||||
def load_models(cur, cls):
|
||||
cur.execute("SELECT key, data FROM metadata_store")
|
||||
rows = cur.fetchall()
|
||||
return [parse_obj_as(cls, row["data"]) for row in rows]
|
||||
return [TypeAdapter(cls).validate_python(row["data"]) for row in rows]
|
||||
|
||||
|
||||
class PGVectorIndex(EmbeddingIndex):
|
||||
def __init__(self, bank: VectorMemoryBank, dimension: int, cursor):
|
||||
def __init__(self, vector_db: VectorDB, dimension: int, cursor):
|
||||
self.cursor = cursor
|
||||
self.table_name = f"vector_store_{bank.identifier}"
|
||||
self.table_name = f"vector_store_{vector_db.identifier}"
|
||||
|
||||
self.cursor.execute(
|
||||
f"""
|
||||
|
@ -85,7 +80,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|||
values.append(
|
||||
(
|
||||
f"{chunk.document_id}:chunk-{i}",
|
||||
Json(chunk.dict()),
|
||||
Json(chunk.model_dump()),
|
||||
embeddings[i].tolist(),
|
||||
)
|
||||
)
|
||||
|
@ -101,7 +96,7 @@ class PGVectorIndex(EmbeddingIndex):
|
|||
|
||||
async def query(
|
||||
self, embedding: NDArray, k: int, score_threshold: float
|
||||
) -> QueryDocumentsResponse:
|
||||
) -> QueryChunksResponse:
|
||||
self.cursor.execute(
|
||||
f"""
|
||||
SELECT document, embedding <-> %s::vector AS distance
|
||||
|
@ -119,13 +114,13 @@ class PGVectorIndex(EmbeddingIndex):
|
|||
chunks.append(Chunk(**doc))
|
||||
scores.append(1.0 / float(dist))
|
||||
|
||||
return QueryDocumentsResponse(chunks=chunks, scores=scores)
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def delete(self):
|
||||
self.cursor.execute(f"DROP TABLE IF EXISTS {self.table_name}")
|
||||
|
||||
|
||||
class PGVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
||||
class PGVectorVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||
def __init__(self, config: PGVectorConfig, inference_api: Api.inference) -> None:
|
||||
self.config = config
|
||||
self.inference_api = inference_api
|
||||
|
@ -167,46 +162,45 @@ class PGVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
async def register_memory_bank(self, memory_bank: MemoryBank) -> None:
|
||||
assert (
|
||||
memory_bank.memory_bank_type == MemoryBankType.vector.value
|
||||
), f"Only vector banks are supported {memory_bank.memory_bank_type}"
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
upsert_models(self.cursor, [(vector_db.identifier, vector_db)])
|
||||
|
||||
upsert_models(self.cursor, [(memory_bank.identifier, memory_bank)])
|
||||
index = PGVectorIndex(memory_bank, memory_bank.embedding_dimension, self.cursor)
|
||||
self.cache[memory_bank.identifier] = BankWithIndex(
|
||||
memory_bank, index, self.inference_api
|
||||
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.cursor)
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(
|
||||
vector_db, index, self.inference_api
|
||||
)
|
||||
|
||||
async def unregister_memory_bank(self, memory_bank_id: str) -> None:
|
||||
await self.cache[memory_bank_id].index.delete()
|
||||
del self.cache[memory_bank_id]
|
||||
async def unregister_vector_db(self, vector_db_id: str) -> None:
|
||||
await self.cache[vector_db_id].index.delete()
|
||||
del self.cache[vector_db_id]
|
||||
|
||||
async def insert_documents(
|
||||
async def insert_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
documents: List[MemoryBankDocument],
|
||||
vector_db_id: str,
|
||||
chunks: List[Chunk],
|
||||
ttl_seconds: Optional[int] = None,
|
||||
) -> None:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
await index.insert_documents(documents)
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
await index.insert_chunks(chunks)
|
||||
|
||||
async def query_documents(
|
||||
async def query_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
vector_db_id: str,
|
||||
query: InterleavedContent,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
) -> QueryDocumentsResponse:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
return await index.query_documents(query, params)
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
return await index.query_chunks(query, params)
|
||||
|
||||
self.inference_api = inference_api
|
||||
async def _get_and_cache_vector_db_index(
|
||||
self, vector_db_id: str
|
||||
) -> VectorDBWithIndex:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
|
||||
async def _get_and_cache_bank_index(self, bank_id: str) -> BankWithIndex:
|
||||
if bank_id in self.cache:
|
||||
return self.cache[bank_id]
|
||||
|
||||
bank = await self.memory_bank_store.get_memory_bank(bank_id)
|
||||
index = PGVectorIndex(bank, bank.embedding_dimension, self.cursor)
|
||||
self.cache[bank_id] = BankWithIndex(bank, index, self.inference_api)
|
||||
return self.cache[bank_id]
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
index = PGVectorIndex(vector_db, vector_db.embedding_dimension, self.cursor)
|
||||
self.cache[vector_db_id] = VectorDBWithIndex(
|
||||
vector_db, index, self.inference_api
|
||||
)
|
||||
return self.cache[vector_db_id]
|
||||
|
|
|
@ -13,19 +13,14 @@ from qdrant_client import AsyncQdrantClient, models
|
|||
from qdrant_client.models import PointStruct
|
||||
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.memory import (
|
||||
Chunk,
|
||||
Memory,
|
||||
MemoryBankDocument,
|
||||
QueryDocumentsResponse,
|
||||
)
|
||||
from llama_stack.apis.memory_banks import MemoryBank, MemoryBankType
|
||||
from llama_stack.providers.datatypes import Api, MemoryBanksProtocolPrivate
|
||||
from llama_stack.providers.remote.memory.qdrant.config import QdrantConfig
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
BankWithIndex,
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
)
|
||||
from .config import QdrantConfig
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
CHUNK_ID_KEY = "_chunk_id"
|
||||
|
@ -76,7 +71,7 @@ class QdrantIndex(EmbeddingIndex):
|
|||
|
||||
async def query(
|
||||
self, embedding: NDArray, k: int, score_threshold: float
|
||||
) -> QueryDocumentsResponse:
|
||||
) -> QueryChunksResponse:
|
||||
results = (
|
||||
await self.client.query_points(
|
||||
collection_name=self.collection_name,
|
||||
|
@ -101,10 +96,10 @@ class QdrantIndex(EmbeddingIndex):
|
|||
chunks.append(chunk)
|
||||
scores.append(point.score)
|
||||
|
||||
return QueryDocumentsResponse(chunks=chunks, scores=scores)
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
|
||||
class QdrantVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
||||
class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||
def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None:
|
||||
self.config = config
|
||||
self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True))
|
||||
|
@ -117,58 +112,56 @@ class QdrantVectorMemoryAdapter(Memory, MemoryBanksProtocolPrivate):
|
|||
async def shutdown(self) -> None:
|
||||
self.client.close()
|
||||
|
||||
async def register_memory_bank(
|
||||
async def register_vector_db(
|
||||
self,
|
||||
memory_bank: MemoryBank,
|
||||
vector_db: VectorDB,
|
||||
) -> None:
|
||||
assert (
|
||||
memory_bank.memory_bank_type == MemoryBankType.vector
|
||||
), f"Only vector banks are supported {memory_bank.memory_bank_type}"
|
||||
|
||||
index = BankWithIndex(
|
||||
bank=memory_bank,
|
||||
index=QdrantIndex(self.client, memory_bank.identifier),
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=QdrantIndex(self.client, vector_db.identifier),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
|
||||
self.cache[memory_bank.identifier] = index
|
||||
self.cache[vector_db.identifier] = index
|
||||
|
||||
async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]:
|
||||
if bank_id in self.cache:
|
||||
return self.cache[bank_id]
|
||||
async def _get_and_cache_vector_db_index(
|
||||
self, vector_db_id: str
|
||||
) -> Optional[VectorDBWithIndex]:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
|
||||
bank = await self.memory_bank_store.get_memory_bank(bank_id)
|
||||
if not bank:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
index = BankWithIndex(
|
||||
bank=bank,
|
||||
index=QdrantIndex(client=self.client, collection_name=bank_id),
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=QdrantIndex(client=self.client, collection_name=vector_db.identifier),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
self.cache[bank_id] = index
|
||||
self.cache[vector_db_id] = index
|
||||
return index
|
||||
|
||||
async def insert_documents(
|
||||
async def insert_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
documents: List[MemoryBankDocument],
|
||||
vector_db_id: str,
|
||||
chunks: List[Chunk],
|
||||
ttl_seconds: Optional[int] = None,
|
||||
) -> None:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
if not index:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
await index.insert_documents(documents)
|
||||
await index.insert_chunks(chunks)
|
||||
|
||||
async def query_documents(
|
||||
async def query_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
vector_db_id: str,
|
||||
query: InterleavedContent,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
) -> QueryDocumentsResponse:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
if not index:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
return await index.query_documents(query, params)
|
||||
return await index.query_chunks(query, params)
|
||||
|
|
|
@ -4,19 +4,22 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.memory import Memory
|
||||
from llama_stack.apis.memory_banks import MemoryBank
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import VectorIO
|
||||
from .config import SampleConfig
|
||||
|
||||
|
||||
class SampleMemoryImpl(Memory):
|
||||
class SampleMemoryImpl(VectorIO):
|
||||
def __init__(self, config: SampleConfig):
|
||||
self.config = config
|
||||
|
||||
async def register_memory_bank(self, memory_bank: MemoryBank) -> None:
|
||||
# these are the memory banks the Llama Stack will use to route requests to this provider
|
||||
async def register_vector_db(self, vector_db: VectorDB) -> None:
|
||||
# these are the vector dbs the Llama Stack will use to route requests to this provider
|
||||
# perform validation here if necessary
|
||||
pass
|
||||
|
||||
async def initialize(self):
|
||||
pass
|
||||
|
||||
async def shutdown(self):
|
||||
pass
|
||||
|
|
|
@ -15,18 +15,13 @@ from weaviate.classes.init import Auth
|
|||
from weaviate.classes.query import Filter
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent
|
||||
from llama_stack.apis.memory import (
|
||||
Chunk,
|
||||
Memory,
|
||||
MemoryBankDocument,
|
||||
QueryDocumentsResponse,
|
||||
)
|
||||
from llama_stack.apis.memory_banks import MemoryBank, MemoryBankType
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.datatypes import Api, MemoryBanksProtocolPrivate
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
BankWithIndex,
|
||||
EmbeddingIndex,
|
||||
VectorDBWithIndex,
|
||||
)
|
||||
|
||||
from .config import WeaviateConfig, WeaviateRequestProviderData
|
||||
|
@ -49,7 +44,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|||
data_objects.append(
|
||||
wvc.data.DataObject(
|
||||
properties={
|
||||
"chunk_content": chunk.json(),
|
||||
"chunk_content": chunk.model_dump_json(),
|
||||
},
|
||||
vector=embeddings[i].tolist(),
|
||||
)
|
||||
|
@ -63,7 +58,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|||
|
||||
async def query(
|
||||
self, embedding: NDArray, k: int, score_threshold: float
|
||||
) -> QueryDocumentsResponse:
|
||||
) -> QueryChunksResponse:
|
||||
collection = self.client.collections.get(self.collection_name)
|
||||
|
||||
results = collection.query.near_vector(
|
||||
|
@ -86,7 +81,7 @@ class WeaviateIndex(EmbeddingIndex):
|
|||
chunks.append(chunk)
|
||||
scores.append(1.0 / doc.metadata.distance)
|
||||
|
||||
return QueryDocumentsResponse(chunks=chunks, scores=scores)
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def delete(self, chunk_ids: List[str]) -> None:
|
||||
collection = self.client.collections.get(self.collection_name)
|
||||
|
@ -96,9 +91,9 @@ class WeaviateIndex(EmbeddingIndex):
|
|||
|
||||
|
||||
class WeaviateMemoryAdapter(
|
||||
Memory,
|
||||
VectorIO,
|
||||
NeedsRequestProviderData,
|
||||
MemoryBanksProtocolPrivate,
|
||||
VectorDBsProtocolPrivate,
|
||||
):
|
||||
def __init__(self, config: WeaviateConfig, inference_api: Api.inference) -> None:
|
||||
self.config = config
|
||||
|
@ -129,20 +124,16 @@ class WeaviateMemoryAdapter(
|
|||
for client in self.client_cache.values():
|
||||
client.close()
|
||||
|
||||
async def register_memory_bank(
|
||||
async def register_vector_db(
|
||||
self,
|
||||
memory_bank: MemoryBank,
|
||||
vector_db: VectorDB,
|
||||
) -> None:
|
||||
assert (
|
||||
memory_bank.memory_bank_type == MemoryBankType.vector.value
|
||||
), f"Only vector banks are supported {memory_bank.memory_bank_type}"
|
||||
|
||||
client = self._get_client()
|
||||
|
||||
# Create collection if it doesn't exist
|
||||
if not client.collections.exists(memory_bank.identifier):
|
||||
if not client.collections.exists(vector_db.identifier):
|
||||
client.collections.create(
|
||||
name=memory_bank.identifier,
|
||||
name=vector_db.identifier,
|
||||
vectorizer_config=wvc.config.Configure.Vectorizer.none(),
|
||||
properties=[
|
||||
wvc.config.Property(
|
||||
|
@ -152,52 +143,54 @@ class WeaviateMemoryAdapter(
|
|||
],
|
||||
)
|
||||
|
||||
self.cache[memory_bank.identifier] = BankWithIndex(
|
||||
memory_bank,
|
||||
WeaviateIndex(client=client, collection_name=memory_bank.identifier),
|
||||
self.cache[vector_db.identifier] = VectorDBWithIndex(
|
||||
vector_db,
|
||||
WeaviateIndex(client=client, collection_name=vector_db.identifier),
|
||||
self.inference_api,
|
||||
)
|
||||
|
||||
async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]:
|
||||
if bank_id in self.cache:
|
||||
return self.cache[bank_id]
|
||||
async def _get_and_cache_vector_db_index(
|
||||
self, vector_db_id: str
|
||||
) -> Optional[VectorDBWithIndex]:
|
||||
if vector_db_id in self.cache:
|
||||
return self.cache[vector_db_id]
|
||||
|
||||
bank = await self.memory_bank_store.get_memory_bank(bank_id)
|
||||
if not bank:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
|
||||
if not vector_db:
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
client = self._get_client()
|
||||
if not client.collections.exists(bank.identifier):
|
||||
raise ValueError(f"Collection with name `{bank.identifier}` not found")
|
||||
if not client.collections.exists(vector_db.identifier):
|
||||
raise ValueError(f"Collection with name `{vector_db.identifier}` not found")
|
||||
|
||||
index = BankWithIndex(
|
||||
bank=bank,
|
||||
index=WeaviateIndex(client=client, collection_name=bank_id),
|
||||
index = VectorDBWithIndex(
|
||||
vector_db=vector_db,
|
||||
index=WeaviateIndex(client=client, collection_name=vector_db.identifier),
|
||||
inference_api=self.inference_api,
|
||||
)
|
||||
self.cache[bank_id] = index
|
||||
self.cache[vector_db_id] = index
|
||||
return index
|
||||
|
||||
async def insert_documents(
|
||||
async def insert_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
documents: List[MemoryBankDocument],
|
||||
vector_db_id: str,
|
||||
chunks: List[Chunk],
|
||||
ttl_seconds: Optional[int] = None,
|
||||
) -> None:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
if not index:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
await index.insert_documents(documents)
|
||||
await index.insert_chunks(chunks)
|
||||
|
||||
async def query_documents(
|
||||
async def query_chunks(
|
||||
self,
|
||||
bank_id: str,
|
||||
vector_db_id: str,
|
||||
query: InterleavedContent,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
) -> QueryDocumentsResponse:
|
||||
index = await self._get_and_cache_bank_index(bank_id)
|
||||
) -> QueryChunksResponse:
|
||||
index = await self._get_and_cache_vector_db_index(vector_db_id)
|
||||
if not index:
|
||||
raise ValueError(f"Bank {bank_id} not found")
|
||||
raise ValueError(f"Vector DB {vector_db_id} not found")
|
||||
|
||||
return await index.query_documents(query, params)
|
||||
return await index.query_chunks(query, params)
|
||||
|
|
|
@ -53,7 +53,7 @@ async def eval_stack(
|
|||
"inference",
|
||||
"agents",
|
||||
"safety",
|
||||
"memory",
|
||||
"vector_io",
|
||||
"tool_runtime",
|
||||
]:
|
||||
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
|
||||
|
@ -69,7 +69,7 @@ async def eval_stack(
|
|||
Api.scoring,
|
||||
Api.agents,
|
||||
Api.safety,
|
||||
Api.memory,
|
||||
Api.vector_io,
|
||||
Api.tool_runtime,
|
||||
],
|
||||
providers,
|
||||
|
|
|
@ -83,7 +83,7 @@ async def tools_stack(
|
|||
|
||||
providers = {}
|
||||
provider_data = {}
|
||||
for key in ["inference", "memory", "tool_runtime"]:
|
||||
for key in ["inference", "vector_io", "tool_runtime"]:
|
||||
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
|
||||
providers[key] = fixture.providers
|
||||
if key == "inference":
|
||||
|
@ -117,7 +117,12 @@ async def tools_stack(
|
|||
)
|
||||
|
||||
test_stack = await construct_stack_for_test(
|
||||
[Api.tool_groups, Api.inference, Api.memory, Api.tool_runtime],
|
||||
[
|
||||
Api.tool_groups,
|
||||
Api.inference,
|
||||
Api.vector_io,
|
||||
Api.tool_runtime,
|
||||
],
|
||||
providers,
|
||||
provider_data,
|
||||
models=models,
|
||||
|
|
|
@ -8,10 +8,7 @@ import os
|
|||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.inference import UserMessage
|
||||
from llama_stack.apis.memory import MemoryBankDocument
|
||||
from llama_stack.apis.memory_banks import VectorMemoryBankParams
|
||||
from llama_stack.apis.tools import ToolInvocationResult
|
||||
from llama_stack.apis.tools import RAGDocument, RAGQueryResult, ToolInvocationResult
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
|
||||
|
@ -36,7 +33,7 @@ def sample_documents():
|
|||
"lora_finetune.rst",
|
||||
]
|
||||
return [
|
||||
MemoryBankDocument(
|
||||
RAGDocument(
|
||||
document_id=f"num-{i}",
|
||||
content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
|
||||
mime_type="text/plain",
|
||||
|
@ -57,7 +54,7 @@ class TestTools:
|
|||
|
||||
# Execute the tool
|
||||
response = await tools_impl.invoke_tool(
|
||||
tool_name="web_search", args={"query": sample_search_query}
|
||||
tool_name="web_search", kwargs={"query": sample_search_query}
|
||||
)
|
||||
|
||||
# Verify the response
|
||||
|
@ -75,7 +72,7 @@ class TestTools:
|
|||
tools_impl = tools_stack.impls[Api.tool_runtime]
|
||||
|
||||
response = await tools_impl.invoke_tool(
|
||||
tool_name="wolfram_alpha", args={"query": sample_wolfram_alpha_query}
|
||||
tool_name="wolfram_alpha", kwargs={"query": sample_wolfram_alpha_query}
|
||||
)
|
||||
|
||||
# Verify the response
|
||||
|
@ -85,43 +82,33 @@ class TestTools:
|
|||
assert isinstance(response.content, str)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_memory_tool(self, tools_stack, sample_documents):
|
||||
async def test_rag_tool(self, tools_stack, sample_documents):
|
||||
"""Test the memory tool functionality."""
|
||||
memory_banks_impl = tools_stack.impls[Api.memory_banks]
|
||||
memory_impl = tools_stack.impls[Api.memory]
|
||||
vector_dbs_impl = tools_stack.impls[Api.vector_dbs]
|
||||
tools_impl = tools_stack.impls[Api.tool_runtime]
|
||||
|
||||
# Register memory bank
|
||||
await memory_banks_impl.register_memory_bank(
|
||||
memory_bank_id="test_bank",
|
||||
params=VectorMemoryBankParams(
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
chunk_size_in_tokens=512,
|
||||
overlap_size_in_tokens=64,
|
||||
),
|
||||
await vector_dbs_impl.register(
|
||||
vector_db_id="test_bank",
|
||||
embedding_model="all-MiniLM-L6-v2",
|
||||
embedding_dimension=384,
|
||||
provider_id="faiss",
|
||||
)
|
||||
|
||||
# Insert documents into memory
|
||||
await memory_impl.insert_documents(
|
||||
bank_id="test_bank",
|
||||
await tools_impl.rag_tool.insert_documents(
|
||||
documents=sample_documents,
|
||||
vector_db_id="test_bank",
|
||||
chunk_size_in_tokens=512,
|
||||
)
|
||||
|
||||
# Execute the memory tool
|
||||
response = await tools_impl.invoke_tool(
|
||||
tool_name="memory",
|
||||
args={
|
||||
"messages": [
|
||||
UserMessage(
|
||||
content="What are the main topics covered in the documentation?",
|
||||
)
|
||||
],
|
||||
"memory_bank_ids": ["test_bank"],
|
||||
},
|
||||
response = await tools_impl.rag_tool.query_context(
|
||||
content="What are the main topics covered in the documentation?",
|
||||
vector_db_ids=["test_bank"],
|
||||
)
|
||||
|
||||
# Verify the response
|
||||
assert isinstance(response, ToolInvocationResult)
|
||||
assert isinstance(response, RAGQueryResult)
|
||||
assert response.content is not None
|
||||
assert len(response.content) > 0
|
||||
|
|
|
@ -10,7 +10,7 @@ from llama_models.sku_list import all_registered_models
|
|||
|
||||
from llama_stack.apis.models import ModelInput
|
||||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
@ -18,7 +18,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::bedrock"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["remote::bedrock"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -34,7 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
],
|
||||
}
|
||||
name = "bedrock"
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -78,7 +78,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
run_configs={
|
||||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=default_models,
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::bedrock
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -5,17 +5,17 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -104,7 +104,7 @@ models:
|
|||
provider_model_id: meta.llama3-1-405b-instruct-v1:0
|
||||
model_type: llm
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -6,7 +6,7 @@ distribution_spec:
|
|||
- remote::cerebras
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -13,7 +13,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupIn
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
||||
from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
@ -23,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
providers = {
|
||||
"inference": ["remote::cerebras"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
|
@ -68,7 +68,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"embedding_dimension": 384,
|
||||
},
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_shields=[],
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: cerebras
|
||||
|
@ -24,7 +24,7 @@ providers:
|
|||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -106,7 +106,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -60,7 +60,7 @@ providers:
|
|||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -82,7 +82,7 @@ metadata_store:
|
|||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
models: []
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::fireworks
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -18,7 +18,7 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
||||
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
@ -27,7 +27,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::fireworks"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -107,7 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||
|
@ -119,7 +119,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
inference_provider,
|
||||
embedding_provider,
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: fireworks
|
||||
|
@ -20,7 +20,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -161,7 +161,7 @@ shields:
|
|||
provider_id: llama-guard-vision
|
||||
- shield_id: CodeScanner
|
||||
provider_id: code-scanner
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: fireworks
|
||||
|
@ -20,7 +20,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -150,7 +150,7 @@ models:
|
|||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::hf::endpoint
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
@ -22,7 +22,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::hf::endpoint"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -48,7 +48,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -97,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -115,7 +115,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
),
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[
|
||||
inference_model,
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-endpoint
|
||||
|
@ -25,7 +25,7 @@ providers:
|
|||
config:
|
||||
endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME}
|
||||
api_token: ${env.HF_API_TOKEN}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -113,7 +113,7 @@ models:
|
|||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-endpoint
|
||||
|
@ -20,7 +20,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -103,7 +103,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::hf::serverless
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
@ -22,7 +22,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::hf::serverless"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -49,7 +49,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -98,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -116,7 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
),
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[
|
||||
inference_model,
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-serverless
|
||||
|
@ -25,7 +25,7 @@ providers:
|
|||
config:
|
||||
huggingface_repo: ${env.SAFETY_MODEL}
|
||||
api_token: ${env.HF_API_TOKEN}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -113,7 +113,7 @@ models:
|
|||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: hf-serverless
|
||||
|
@ -20,7 +20,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -103,7 +103,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- inline::meta-reference
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -19,14 +19,14 @@ from llama_stack.providers.inline.inference.meta_reference import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["inline::meta-reference"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -103,7 +103,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -122,7 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
),
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[
|
||||
inference_model,
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
|
@ -27,7 +27,7 @@ providers:
|
|||
model: ${env.SAFETY_MODEL}
|
||||
max_seq_len: 4096
|
||||
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -115,7 +115,7 @@ models:
|
|||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
|
@ -21,7 +21,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -104,7 +104,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- inline::meta-reference-quantized
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -14,14 +14,14 @@ from llama_stack.providers.inline.inference.meta_reference import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["inline::meta-reference-quantized"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -64,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: meta-reference-inference
|
||||
|
@ -23,7 +23,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -106,7 +106,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::nvidia
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
|
|
|
@ -17,7 +17,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::nvidia"],
|
||||
"memory": ["inline::faiss"],
|
||||
"vector_io": ["inline::faiss"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: nvidia
|
||||
|
@ -17,7 +17,7 @@ providers:
|
|||
config:
|
||||
url: https://integrate.api.nvidia.com
|
||||
api_key: ${env.NVIDIA_API_KEY}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -136,7 +136,7 @@ models:
|
|||
provider_model_id: meta/llama-3.2-90b-vision-instruct
|
||||
model_type: llm
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::ollama
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::ollama"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -49,7 +49,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -98,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -109,7 +109,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
inference_provider,
|
||||
embedding_provider,
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: ollama
|
||||
|
@ -19,7 +19,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -110,7 +110,7 @@ shields:
|
|||
provider_id: llama-guard
|
||||
- shield_id: CodeScanner
|
||||
provider_id: code-scanner
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: ollama
|
||||
|
@ -19,7 +19,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -99,7 +99,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::vllm
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm-inference
|
||||
|
@ -27,7 +27,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -115,7 +115,7 @@ models:
|
|||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm-inference
|
||||
|
@ -21,7 +21,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -104,7 +104,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::vllm"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
|
@ -52,7 +52,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -118,7 +118,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
embedding_provider,
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[
|
||||
inference_model,
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- remote::tgi
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: tgi-inference
|
||||
|
@ -20,7 +20,7 @@ providers:
|
|||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_SAFETY_URL}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -103,7 +103,7 @@ models:
|
|||
model_type: llm
|
||||
shields:
|
||||
- shield_id: ${env.SAFETY_MODEL}
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: tgi-inference
|
||||
|
@ -19,7 +19,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -102,7 +102,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::tgi"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -52,7 +52,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::sentence-transformers",
|
||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -101,7 +101,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -118,7 +118,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
),
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[
|
||||
inference_model,
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: together
|
||||
|
@ -20,7 +20,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -156,7 +156,7 @@ shields:
|
|||
provider_id: llama-guard-vision
|
||||
- shield_id: CodeScanner
|
||||
provider_id: code-scanner
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- vector_io
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: together
|
||||
|
@ -145,6 +145,7 @@ models:
|
|||
model_type: embedding
|
||||
shields:
|
||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -18,7 +18,7 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
@ -27,7 +27,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::together"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -48,7 +48,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="remote::together",
|
||||
config=TogetherImplConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
@ -117,7 +117,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
inference_provider,
|
||||
embedding_provider,
|
||||
],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
|||
providers:
|
||||
inference:
|
||||
- inline::vllm
|
||||
memory:
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
|||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- memory
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm
|
||||
|
@ -23,7 +23,7 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
memory:
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
|
@ -106,7 +106,7 @@ models:
|
|||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
memory_banks: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
eval_tasks: []
|
||||
|
|
|
@ -10,7 +10,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
|||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.inference.vllm import VLLMConfig
|
||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
|
@ -21,7 +21,7 @@ from llama_stack.templates.template import (
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["inline::vllm"],
|
||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
|
@ -43,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
provider_type="inline::vllm",
|
||||
config=VLLMConfig.sample_run_config(),
|
||||
)
|
||||
memory_provider = Provider(
|
||||
vector_io_provider = Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||
|
@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"run.yaml": RunConfigSettings(
|
||||
provider_overrides={
|
||||
"inference": [inference_provider, embedding_provider],
|
||||
"memory": [memory_provider],
|
||||
"vector_io": [vector_io_provider],
|
||||
},
|
||||
default_models=[inference_model, embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue