Update templates

This commit is contained in:
Ashwin Bharambe 2025-01-21 22:12:34 -08:00
parent 5605917361
commit 33ea91364e
68 changed files with 272 additions and 281 deletions

View file

@ -8,11 +8,11 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::nvidia` | | inference | `remote::nvidia` |
| memory | `inline::faiss` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss` |
### Environment Variables ### Environment Variables

View file

@ -15,11 +15,11 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::bedrock` | | inference | `remote::bedrock` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `remote::bedrock` | | safety | `remote::bedrock` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |

View file

@ -8,11 +8,11 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::cerebras` | | inference | `remote::cerebras` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
### Environment Variables ### Environment Variables

View file

@ -18,11 +18,11 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::fireworks` | | inference | `remote::fireworks` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
### Environment Variables ### Environment Variables

View file

@ -18,11 +18,11 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `inline::meta-reference` | | inference | `inline::meta-reference` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.

View file

@ -18,11 +18,11 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `inline::meta-reference-quantized` | | inference | `inline::meta-reference-quantized` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc. The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.

View file

@ -18,11 +18,11 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::ollama` | | inference | `remote::ollama` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables

View file

@ -17,11 +17,11 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::vllm` | | inference | `remote::vllm` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.

View file

@ -19,11 +19,11 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::tgi` | | inference | `remote::tgi` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference. You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.

View file

@ -18,11 +18,11 @@ The `llamastack/distribution-together` distribution consists of the following pr
| datasetio | `remote::huggingface`, `inline::localfs` | | datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` | | eval | `inline::meta-reference` |
| inference | `remote::together` | | inference | `remote::together` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
### Environment Variables ### Environment Variables

View file

@ -88,7 +88,7 @@ class MemoryRetrievalStep(StepCommon):
step_type: Literal[StepType.memory_retrieval.value] = ( step_type: Literal[StepType.memory_retrieval.value] = (
StepType.memory_retrieval.value StepType.memory_retrieval.value
) )
memory_bank_ids: List[str] vector_db_ids: str
inserted_context: InterleavedContent inserted_context: InterleavedContent

View file

@ -208,7 +208,7 @@ class EventLogger:
): ):
details = event.payload.step_details details = event.payload.step_details
inserted_context = interleaved_content_as_str(details.inserted_context) inserted_context = interleaved_content_as_str(details.inserted_context)
content = f"fetched {len(inserted_context)} bytes from {details.memory_bank_ids}" content = f"fetched {len(inserted_context)} bytes from {details.vector_db_ids}"
yield ( yield (
event, event,

View file

@ -37,5 +37,5 @@ class Resource(BaseModel):
provider_id: str = Field(description="ID of the provider that owns this resource") provider_id: str = Field(description="ID of the provider that owns this resource")
type: ResourceType = Field( type: ResourceType = Field(
description="Type of resource (e.g. 'model', 'shield', 'memory_bank', etc.)" description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)"
) )

View file

@ -9,7 +9,7 @@ import os
import pytest import pytest
import pytest_asyncio import pytest_asyncio
from llama_stack.apis.inference import Model from llama_stack.apis.inference import Model
from llama_stack.apis.memory_banks import VectorMemoryBank from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.distribution.store.registry import ( from llama_stack.distribution.store.registry import (
CachedDiskDistributionRegistry, CachedDiskDistributionRegistry,
@ -42,13 +42,12 @@ async def cached_registry(config):
@pytest.fixture @pytest.fixture
def sample_bank(): def sample_vector_db():
return VectorMemoryBank( return VectorDB(
identifier="test_bank", identifier="test_vector_db",
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=512, embedding_dimension=384,
overlap_size_in_tokens=64, provider_resource_id="test_vector_db",
provider_resource_id="test_bank",
provider_id="test-provider", provider_id="test-provider",
) )
@ -70,19 +69,17 @@ async def test_registry_initialization(registry):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_basic_registration(registry, sample_bank, sample_model): async def test_basic_registration(registry, sample_vector_db, sample_model):
print(f"Registering {sample_bank}") print(f"Registering {sample_vector_db}")
await registry.register(sample_bank) await registry.register(sample_vector_db)
print(f"Registering {sample_model}") print(f"Registering {sample_model}")
await registry.register(sample_model) await registry.register(sample_model)
print("Getting bank") print("Getting vector_db")
result_bank = await registry.get("memory_bank", "test_bank") result_vector_db = await registry.get("vector_db", "test_vector_db")
assert result_bank is not None assert result_vector_db is not None
assert result_bank.identifier == sample_bank.identifier assert result_vector_db.identifier == sample_vector_db.identifier
assert result_bank.embedding_model == sample_bank.embedding_model assert result_vector_db.embedding_model == sample_vector_db.embedding_model
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens assert result_vector_db.provider_id == sample_vector_db.provider_id
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens
assert result_bank.provider_id == sample_bank.provider_id
result_model = await registry.get("model", "test_model") result_model = await registry.get("model", "test_model")
assert result_model is not None assert result_model is not None
@ -91,24 +88,23 @@ async def test_basic_registration(registry, sample_bank, sample_model):
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_cached_registry_initialization(config, sample_bank, sample_model): async def test_cached_registry_initialization(config, sample_vector_db, sample_model):
# First populate the disk registry # First populate the disk registry
disk_registry = DiskDistributionRegistry(await kvstore_impl(config)) disk_registry = DiskDistributionRegistry(await kvstore_impl(config))
await disk_registry.initialize() await disk_registry.initialize()
await disk_registry.register(sample_bank) await disk_registry.register(sample_vector_db)
await disk_registry.register(sample_model) await disk_registry.register(sample_model)
# Test cached version loads from disk # Test cached version loads from disk
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config)) cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
await cached_registry.initialize() await cached_registry.initialize()
result_bank = await cached_registry.get("memory_bank", "test_bank") result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
assert result_bank is not None assert result_vector_db is not None
assert result_bank.identifier == sample_bank.identifier assert result_vector_db.identifier == sample_vector_db.identifier
assert result_bank.embedding_model == sample_bank.embedding_model assert result_vector_db.embedding_model == sample_vector_db.embedding_model
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens assert result_vector_db.provider_id == sample_vector_db.provider_id
assert result_bank.provider_id == sample_bank.provider_id
@pytest.mark.asyncio @pytest.mark.asyncio
@ -116,29 +112,28 @@ async def test_cached_registry_updates(config):
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config)) cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
await cached_registry.initialize() await cached_registry.initialize()
new_bank = VectorMemoryBank( new_vector_db = VectorDB(
identifier="test_bank_2", identifier="test_vector_db_2",
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=256, embedding_dimension=384,
overlap_size_in_tokens=32, provider_resource_id="test_vector_db_2",
provider_resource_id="test_bank_2",
provider_id="baz", provider_id="baz",
) )
await cached_registry.register(new_bank) await cached_registry.register(new_vector_db)
# Verify in cache # Verify in cache
result_bank = await cached_registry.get("memory_bank", "test_bank_2") result_vector_db = await cached_registry.get("vector_db", "test_vector_db_2")
assert result_bank is not None assert result_vector_db is not None
assert result_bank.identifier == new_bank.identifier assert result_vector_db.identifier == new_vector_db.identifier
assert result_bank.provider_id == new_bank.provider_id assert result_vector_db.provider_id == new_vector_db.provider_id
# Verify persisted to disk # Verify persisted to disk
new_registry = DiskDistributionRegistry(await kvstore_impl(config)) new_registry = DiskDistributionRegistry(await kvstore_impl(config))
await new_registry.initialize() await new_registry.initialize()
result_bank = await new_registry.get("memory_bank", "test_bank_2") result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
assert result_bank is not None assert result_vector_db is not None
assert result_bank.identifier == new_bank.identifier assert result_vector_db.identifier == new_vector_db.identifier
assert result_bank.provider_id == new_bank.provider_id assert result_vector_db.provider_id == new_vector_db.provider_id
@pytest.mark.asyncio @pytest.mark.asyncio
@ -146,30 +141,28 @@ async def test_duplicate_provider_registration(config):
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config)) cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
await cached_registry.initialize() await cached_registry.initialize()
original_bank = VectorMemoryBank( original_vector_db = VectorDB(
identifier="test_bank_2", identifier="test_vector_db_2",
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=256, embedding_dimension=384,
overlap_size_in_tokens=32, provider_resource_id="test_vector_db_2",
provider_resource_id="test_bank_2",
provider_id="baz", provider_id="baz",
) )
await cached_registry.register(original_bank) await cached_registry.register(original_vector_db)
duplicate_bank = VectorMemoryBank( duplicate_vector_db = VectorDB(
identifier="test_bank_2", identifier="test_vector_db_2",
embedding_model="different-model", embedding_model="different-model",
chunk_size_in_tokens=128, embedding_dimension=384,
overlap_size_in_tokens=16, provider_resource_id="test_vector_db_2",
provider_resource_id="test_bank_2",
provider_id="baz", # Same provider_id provider_id="baz", # Same provider_id
) )
await cached_registry.register(duplicate_bank) await cached_registry.register(duplicate_vector_db)
result = await cached_registry.get("memory_bank", "test_bank_2") result = await cached_registry.get("vector_db", "test_vector_db_2")
assert result is not None assert result is not None
assert ( assert (
result.embedding_model == original_bank.embedding_model result.embedding_model == original_vector_db.embedding_model
) # Original values preserved ) # Original values preserved
@ -179,36 +172,35 @@ async def test_get_all_objects(config):
await cached_registry.initialize() await cached_registry.initialize()
# Create multiple test banks # Create multiple test banks
test_banks = [ test_vector_dbs = [
VectorMemoryBank( VectorDB(
identifier=f"test_bank_{i}", identifier=f"test_vector_db_{i}",
embedding_model="all-MiniLM-L6-v2", embedding_model="all-MiniLM-L6-v2",
chunk_size_in_tokens=256, embedding_dimension=384,
overlap_size_in_tokens=32, provider_resource_id=f"test_vector_db_{i}",
provider_resource_id=f"test_bank_{i}",
provider_id=f"provider_{i}", provider_id=f"provider_{i}",
) )
for i in range(3) for i in range(3)
] ]
# Register all banks # Register all vector_dbs
for bank in test_banks: for vector_db in test_vector_dbs:
await cached_registry.register(bank) await cached_registry.register(vector_db)
# Test get_all retrieval # Test get_all retrieval
all_results = await cached_registry.get_all() all_results = await cached_registry.get_all()
assert len(all_results) == 3 assert len(all_results) == 3
# Verify each bank was stored correctly # Verify each vector_db was stored correctly
for original_bank in test_banks: for original_vector_db in test_vector_dbs:
matching_banks = [ matching_vector_dbs = [
b for b in all_results if b.identifier == original_bank.identifier v for v in all_results if v.identifier == original_vector_db.identifier
] ]
assert len(matching_banks) == 1 assert len(matching_vector_dbs) == 1
stored_bank = matching_banks[0] stored_vector_db = matching_vector_dbs[0]
assert stored_bank.embedding_model == original_bank.embedding_model assert stored_vector_db.embedding_model == original_vector_db.embedding_model
assert stored_bank.provider_id == original_bank.provider_id assert stored_vector_db.provider_id == original_vector_db.provider_id
assert stored_bank.chunk_size_in_tokens == original_bank.chunk_size_in_tokens
assert ( assert (
stored_bank.overlap_size_in_tokens == original_bank.overlap_size_in_tokens stored_vector_db.embedding_dimension
== original_vector_db.embedding_dimension
) )

View file

@ -1,23 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import streamlit as st
from modules.api import llama_stack_api
def memory_banks():
st.header("Memory Banks")
memory_banks_info = {
m.identifier: m.to_dict() for m in llama_stack_api.client.memory_banks.list()
}
if len(memory_banks_info) > 0:
selected_memory_bank = st.selectbox(
"Select a memory bank", list(memory_banks_info.keys())
)
st.json(memory_banks_info[selected_memory_bank])
else:
st.info("No memory banks found")

View file

@ -6,10 +6,10 @@
from page.distribution.datasets import datasets from page.distribution.datasets import datasets
from page.distribution.eval_tasks import eval_tasks from page.distribution.eval_tasks import eval_tasks
from page.distribution.memory_banks import memory_banks
from page.distribution.models import models from page.distribution.models import models
from page.distribution.scoring_functions import scoring_functions from page.distribution.scoring_functions import scoring_functions
from page.distribution.shields import shields from page.distribution.shields import shields
from page.distribution.vector_dbs import vector_dbs
from streamlit_option_menu import option_menu from streamlit_option_menu import option_menu
@ -17,7 +17,7 @@ from streamlit_option_menu import option_menu
def resources_page(): def resources_page():
options = [ options = [
"Models", "Models",
"Memory Banks", "Vector Databases",
"Shields", "Shields",
"Scoring Functions", "Scoring Functions",
"Datasets", "Datasets",
@ -37,8 +37,8 @@ def resources_page():
) )
if selected_resource == "Eval Tasks": if selected_resource == "Eval Tasks":
eval_tasks() eval_tasks()
elif selected_resource == "Memory Banks": elif selected_resource == "Vector Databases":
memory_banks() vector_dbs()
elif selected_resource == "Datasets": elif selected_resource == "Datasets":
datasets() datasets()
elif selected_resource == "Models": elif selected_resource == "Models":

View file

@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import streamlit as st
from modules.api import llama_stack_api
def vector_dbs():
st.header("Vector Databases")
vector_dbs_info = {
v.identifier: v.to_dict() for v in llama_stack_api.client.vector_dbs.list()
}
if len(vector_dbs_info) > 0:
selected_vector_db = st.selectbox(
"Select a vector database", list(vector_dbs_info.keys())
)
st.json(vector_dbs_info[selected_vector_db])
else:
st.info("No vector databases found")

View file

@ -29,12 +29,12 @@ def rag_chat_page():
if uploaded_files: if uploaded_files:
st.success(f"Successfully uploaded {len(uploaded_files)} files") st.success(f"Successfully uploaded {len(uploaded_files)} files")
# Add memory bank name input field # Add memory bank name input field
memory_bank_name = st.text_input( vector_db_name = st.text_input(
"Memory Bank Name", "Vector Database Name",
value="rag_bank", value="rag_vector_db",
help="Enter a unique identifier for this memory bank", help="Enter a unique identifier for this vector database",
) )
if st.button("Create Memory Bank"): if st.button("Create Vector Database"):
documents = [ documents = [
Document( Document(
document_id=uploaded_file.name, document_id=uploaded_file.name,
@ -44,37 +44,33 @@ def rag_chat_page():
] ]
providers = llama_stack_api.client.providers.list() providers = llama_stack_api.client.providers.list()
memory_provider = None vector_io_provider = None
for x in providers: for x in providers:
if x.api == "memory": if x.api == "vector_io":
memory_provider = x.provider_id vector_io_provider = x.provider_id
llama_stack_api.client.memory_banks.register( llama_stack_api.client.vector_dbs.register(
memory_bank_id=memory_bank_name, # Use the user-provided name vector_db_id=vector_db_name, # Use the user-provided name
params={ embedding_dimension=384,
"memory_bank_type": "vector", embedding_model="all-MiniLM-L6-v2",
"embedding_model": "all-MiniLM-L6-v2", provider_id=vector_io_provider,
"chunk_size_in_tokens": 512,
"overlap_size_in_tokens": 64,
},
provider_id=memory_provider,
) )
# insert documents using the custom bank name # insert documents using the custom vector db name
llama_stack_api.client.memory.insert( llama_stack_api.client.tool_runtime.rag_tool.insert(
bank_id=memory_bank_name, # Use the user-provided name vector_db_id=vector_db_name, # Use the user-provided name
documents=documents, documents=documents,
) )
st.success("Memory bank created successfully!") st.success("Vector database created successfully!")
st.subheader("Configure Agent") st.subheader("Configure Agent")
# select memory banks # select memory banks
memory_banks = llama_stack_api.client.memory_banks.list() vector_dbs = llama_stack_api.client.vector_dbs.list()
memory_banks = [bank.identifier for bank in memory_banks] vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
selected_memory_banks = st.multiselect( selected_vector_dbs = st.multiselect(
"Select Memory Banks", "Select Vector Databases",
memory_banks, vector_dbs,
) )
available_models = llama_stack_api.client.models.list() available_models = llama_stack_api.client.models.list()
@ -141,14 +137,14 @@ def rag_chat_page():
dict( dict(
name="builtin::memory", name="builtin::memory",
args={ args={
"memory_bank_ids": [bank_id for bank_id in selected_memory_banks], "vector_db_ids": [
vector_db_id for vector_db_id in selected_vector_dbs
],
}, },
) )
], ],
tool_choice="auto", tool_choice="auto",
tool_prompt_format="json", tool_prompt_format="json",
input_shields=[],
output_shields=[],
enable_session_persistence=False, enable_session_persistence=False,
) )

View file

@ -14,8 +14,10 @@ from .config import ChromaInlineImplConfig
async def get_provider_impl( async def get_provider_impl(
config: ChromaInlineImplConfig, deps: Dict[Api, ProviderSpec] config: ChromaInlineImplConfig, deps: Dict[Api, ProviderSpec]
): ):
from llama_stack.providers.remote.memory.chroma.chroma import ChromaMemoryAdapter from llama_stack.providers.remote.vector_io.chroma.chroma import (
ChromaVectorIOAdapter,
)
impl = ChromaMemoryAdapter(config, deps[Api.inference]) impl = ChromaVectorIOAdapter(config, deps[Api.inference])
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -14,8 +14,8 @@ from .config import ChromaRemoteImplConfig
async def get_adapter_impl( async def get_adapter_impl(
config: ChromaRemoteImplConfig, deps: Dict[Api, ProviderSpec] config: ChromaRemoteImplConfig, deps: Dict[Api, ProviderSpec]
): ):
from .chroma import ChromaMemoryAdapter from .chroma import ChromaVectorIOAdapter
impl = ChromaMemoryAdapter(config, deps[Api.inference]) impl = ChromaVectorIOAdapter(config, deps[Api.inference])
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -86,13 +86,13 @@ class ChromaIndex(EmbeddingIndex):
await maybe_await(self.client.delete_collection(self.collection.name)) await maybe_await(self.client.delete_collection(self.collection.name))
class ChromaMemoryAdapter(VectorIO, VectorDBsProtocolPrivate): class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
def __init__( def __init__(
self, self,
config: Union[ChromaRemoteImplConfig, ChromaInlineImplConfig], config: Union[ChromaRemoteImplConfig, ChromaInlineImplConfig],
inference_api: Api.inference, inference_api: Api.inference,
) -> None: ) -> None:
log.info(f"Initializing ChromaMemoryAdapter with url: {config}") log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
self.config = config self.config = config
self.inference_api = inference_api self.inference_api = inference_api

View file

@ -10,7 +10,7 @@ from llama_models.sku_list import all_registered_models
from llama_stack.apis.models import ModelInput from llama_stack.apis.models import ModelInput
from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -18,7 +18,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::bedrock"], "inference": ["remote::bedrock"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["remote::bedrock"], "safety": ["remote::bedrock"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -34,7 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
], ],
} }
name = "bedrock" name = "bedrock"
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -78,7 +78,7 @@ def get_distribution_template() -> DistributionTemplate:
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=default_models, default_models=default_models,
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::bedrock - remote::bedrock
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -5,17 +5,17 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -104,7 +104,7 @@ models:
provider_model_id: meta.llama3-1-405b-instruct-v1:0 provider_model_id: meta.llama3-1-405b-instruct-v1:0
model_type: llm model_type: llm
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -6,7 +6,7 @@ distribution_spec:
- remote::cerebras - remote::cerebras
safety: safety:
- inline::llama-guard - inline::llama-guard
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -13,7 +13,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupIn
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -23,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::cerebras"], "inference": ["remote::cerebras"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
@ -68,7 +68,7 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=default_models + [embedding_model], default_models=default_models + [embedding_model],
default_shields=[], default_shields=[],

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: cerebras - provider_id: cerebras
@ -24,7 +24,7 @@ providers:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -106,7 +106,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -60,7 +60,7 @@ providers:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -82,7 +82,7 @@ metadata_store:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models: [] models: []
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::fireworks - remote::fireworks
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -18,7 +18,7 @@ from llama_stack.distribution.datatypes import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -27,7 +27,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::fireworks"], "inference": ["remote::fireworks"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -107,7 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=default_models + [embedding_model], default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
@ -119,7 +119,7 @@ def get_distribution_template() -> DistributionTemplate:
inference_provider, inference_provider,
embedding_provider, embedding_provider,
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
"safety": [ "safety": [
Provider( Provider(
provider_id="llama-guard", provider_id="llama-guard",

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: fireworks - provider_id: fireworks
@ -20,7 +20,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -161,7 +161,7 @@ shields:
provider_id: llama-guard-vision provider_id: llama-guard-vision
- shield_id: CodeScanner - shield_id: CodeScanner
provider_id: code-scanner provider_id: code-scanner
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: fireworks - provider_id: fireworks
@ -20,7 +20,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -150,7 +150,7 @@ models:
model_type: embedding model_type: embedding
shields: shields:
- shield_id: meta-llama/Llama-Guard-3-8B - shield_id: meta-llama/Llama-Guard-3-8B
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::hf::endpoint - remote::hf::endpoint
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -22,7 +22,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::hf::endpoint"], "inference": ["remote::hf::endpoint"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -48,7 +48,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -97,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -115,7 +115,7 @@ def get_distribution_template() -> DistributionTemplate:
), ),
), ),
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[ default_models=[
inference_model, inference_model,

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: hf-endpoint - provider_id: hf-endpoint
@ -25,7 +25,7 @@ providers:
config: config:
endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME} endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME}
api_token: ${env.HF_API_TOKEN} api_token: ${env.HF_API_TOKEN}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -113,7 +113,7 @@ models:
model_type: embedding model_type: embedding
shields: shields:
- shield_id: ${env.SAFETY_MODEL} - shield_id: ${env.SAFETY_MODEL}
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: hf-endpoint - provider_id: hf-endpoint
@ -20,7 +20,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -103,7 +103,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::hf::serverless - remote::hf::serverless
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -22,7 +22,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::hf::serverless"], "inference": ["remote::hf::serverless"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -49,7 +49,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -98,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -116,7 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
), ),
), ),
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[ default_models=[
inference_model, inference_model,

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: hf-serverless - provider_id: hf-serverless
@ -25,7 +25,7 @@ providers:
config: config:
huggingface_repo: ${env.SAFETY_MODEL} huggingface_repo: ${env.SAFETY_MODEL}
api_token: ${env.HF_API_TOKEN} api_token: ${env.HF_API_TOKEN}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -113,7 +113,7 @@ models:
model_type: embedding model_type: embedding
shields: shields:
- shield_id: ${env.SAFETY_MODEL} - shield_id: ${env.SAFETY_MODEL}
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: hf-serverless - provider_id: hf-serverless
@ -20,7 +20,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -103,7 +103,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- inline::meta-reference - inline::meta-reference
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -19,14 +19,14 @@ from llama_stack.providers.inline.inference.meta_reference import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["inline::meta-reference"], "inference": ["inline::meta-reference"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -103,7 +103,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -122,7 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
), ),
), ),
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[ default_models=[
inference_model, inference_model,

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: meta-reference-inference - provider_id: meta-reference-inference
@ -27,7 +27,7 @@ providers:
model: ${env.SAFETY_MODEL} model: ${env.SAFETY_MODEL}
max_seq_len: 4096 max_seq_len: 4096
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null} checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -115,7 +115,7 @@ models:
model_type: embedding model_type: embedding
shields: shields:
- shield_id: ${env.SAFETY_MODEL} - shield_id: ${env.SAFETY_MODEL}
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: meta-reference-inference - provider_id: meta-reference-inference
@ -21,7 +21,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -104,7 +104,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- inline::meta-reference-quantized - inline::meta-reference-quantized
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -14,14 +14,14 @@ from llama_stack.providers.inline.inference.meta_reference import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["inline::meta-reference-quantized"], "inference": ["inline::meta-reference-quantized"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -64,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: meta-reference-inference - provider_id: meta-reference-inference
@ -23,7 +23,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -106,7 +106,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::nvidia - remote::nvidia
memory: vector_io:
- inline::faiss - inline::faiss
safety: safety:
- inline::llama-guard - inline::llama-guard

View file

@ -17,7 +17,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::nvidia"], "inference": ["remote::nvidia"],
"memory": ["inline::faiss"], "vector_io": ["inline::faiss"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: nvidia - provider_id: nvidia
@ -17,7 +17,7 @@ providers:
config: config:
url: https://integrate.api.nvidia.com url: https://integrate.api.nvidia.com
api_key: ${env.NVIDIA_API_KEY} api_key: ${env.NVIDIA_API_KEY}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -136,7 +136,7 @@ models:
provider_model_id: meta/llama-3.2-90b-vision-instruct provider_model_id: meta/llama-3.2-90b-vision-instruct
model_type: llm model_type: llm
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::ollama - remote::ollama
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::ollama"], "inference": ["remote::ollama"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -49,7 +49,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -98,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -109,7 +109,7 @@ def get_distribution_template() -> DistributionTemplate:
inference_provider, inference_provider,
embedding_provider, embedding_provider,
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
"safety": [ "safety": [
Provider( Provider(
provider_id="llama-guard", provider_id="llama-guard",

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: ollama - provider_id: ollama
@ -19,7 +19,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -110,7 +110,7 @@ shields:
provider_id: llama-guard provider_id: llama-guard
- shield_id: CodeScanner - shield_id: CodeScanner
provider_id: code-scanner provider_id: code-scanner
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: ollama - provider_id: ollama
@ -19,7 +19,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -99,7 +99,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::vllm - remote::vllm
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -27,7 +27,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -115,7 +115,7 @@ models:
model_type: embedding model_type: embedding
shields: shields:
- shield_id: ${env.SAFETY_MODEL} - shield_id: ${env.SAFETY_MODEL}
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -21,7 +21,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -104,7 +104,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::vllm"], "inference": ["remote::vllm"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
@ -52,7 +52,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -118,7 +118,7 @@ def get_distribution_template() -> DistributionTemplate:
), ),
embedding_provider, embedding_provider,
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[ default_models=[
inference_model, inference_model,

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- remote::tgi - remote::tgi
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: tgi-inference - provider_id: tgi-inference
@ -20,7 +20,7 @@ providers:
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_SAFETY_URL} url: ${env.TGI_SAFETY_URL}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -103,7 +103,7 @@ models:
model_type: llm model_type: llm
shields: shields:
- shield_id: ${env.SAFETY_MODEL} - shield_id: ${env.SAFETY_MODEL}
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: tgi-inference - provider_id: tgi-inference
@ -19,7 +19,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -102,7 +102,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import TGIImplConfig from llama_stack.providers.remote.inference.tgi import TGIImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::tgi"], "inference": ["remote::tgi"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -52,7 +52,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -101,7 +101,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -118,7 +118,7 @@ def get_distribution_template() -> DistributionTemplate:
), ),
), ),
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[ default_models=[
inference_model, inference_model,

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: together - provider_id: together
@ -20,7 +20,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -156,7 +156,7 @@ shields:
provider_id: llama-guard-vision provider_id: llama-guard-vision
- shield_id: CodeScanner - shield_id: CodeScanner
provider_id: code-scanner provider_id: code-scanner
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- vector_io
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: together - provider_id: together
@ -145,6 +145,7 @@ models:
model_type: embedding model_type: embedding
shields: shields:
- shield_id: meta-llama/Llama-Guard-3-8B - shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -18,7 +18,7 @@ from llama_stack.distribution.datatypes import (
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -27,7 +27,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["remote::together"], "inference": ["remote::together"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -48,7 +48,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="remote::together", provider_type="remote::together",
config=TogetherImplConfig.sample_run_config(), config=TogetherImplConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=default_models + [embedding_model], default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,
@ -117,7 +117,7 @@ def get_distribution_template() -> DistributionTemplate:
inference_provider, inference_provider,
embedding_provider, embedding_provider,
], ],
"memory": [memory_provider], "vector_io": [vector_io_provider],
"safety": [ "safety": [
Provider( Provider(
provider_id="llama-guard", provider_id="llama-guard",

View file

@ -4,7 +4,7 @@ distribution_spec:
providers: providers:
inference: inference:
- inline::vllm - inline::vllm
memory: vector_io:
- inline::faiss - inline::faiss
- remote::chromadb - remote::chromadb
- remote::pgvector - remote::pgvector

View file

@ -5,11 +5,11 @@ apis:
- datasetio - datasetio
- eval - eval
- inference - inference
- memory
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime - tool_runtime
- vector_io
providers: providers:
inference: inference:
- provider_id: vllm - provider_id: vllm
@ -23,7 +23,7 @@ providers:
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}
memory: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss
config: config:
@ -106,7 +106,7 @@ models:
provider_id: sentence-transformers provider_id: sentence-transformers
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] vector_dbs: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []

View file

@ -10,7 +10,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.inference.vllm import VLLMConfig from llama_stack.providers.inline.inference.vllm import VLLMConfig
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
from llama_stack.templates.template import ( from llama_stack.templates.template import (
DistributionTemplate, DistributionTemplate,
RunConfigSettings, RunConfigSettings,
@ -21,7 +21,7 @@ from llama_stack.templates.template import (
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
providers = { providers = {
"inference": ["inline::vllm"], "inference": ["inline::vllm"],
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
@ -43,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::vllm", provider_type="inline::vllm",
config=VLLMConfig.sample_run_config(), config=VLLMConfig.sample_run_config(),
) )
memory_provider = Provider( vector_io_provider = Provider(
provider_id="faiss", provider_id="faiss",
provider_type="inline::faiss", provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"), config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
"inference": [inference_provider, embedding_provider], "inference": [inference_provider, embedding_provider],
"memory": [memory_provider], "vector_io": [vector_io_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups, default_tool_groups=default_tool_groups,