mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 18:50:44 +00:00
Update templates
This commit is contained in:
parent
5605917361
commit
33ea91364e
68 changed files with 272 additions and 281 deletions
|
@ -8,11 +8,11 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::nvidia` |
|
| inference | `remote::nvidia` |
|
||||||
| memory | `inline::faiss` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss` |
|
||||||
|
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
|
@ -15,11 +15,11 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::bedrock` |
|
| inference | `remote::bedrock` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `remote::bedrock` |
|
| safety | `remote::bedrock` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,11 +8,11 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::cerebras` |
|
| inference | `remote::cerebras` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::fireworks` |
|
| inference | `remote::fireworks` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `inline::meta-reference` |
|
| inference | `inline::meta-reference` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
|
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.
|
||||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `inline::meta-reference-quantized` |
|
| inference | `inline::meta-reference-quantized` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
|
The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.
|
||||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::ollama` |
|
| inference | `remote::ollama` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables
|
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables
|
||||||
|
|
|
@ -17,11 +17,11 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::vllm` |
|
| inference | `remote::vllm` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.
|
You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.
|
||||||
|
|
|
@ -19,11 +19,11 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::tgi` |
|
| inference | `remote::tgi` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.
|
You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.
|
||||||
|
|
|
@ -18,11 +18,11 @@ The `llamastack/distribution-together` distribution consists of the following pr
|
||||||
| datasetio | `remote::huggingface`, `inline::localfs` |
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
| eval | `inline::meta-reference` |
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::together` |
|
| inference | `remote::together` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime`, `remote::model-context-protocol` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
|
@ -88,7 +88,7 @@ class MemoryRetrievalStep(StepCommon):
|
||||||
step_type: Literal[StepType.memory_retrieval.value] = (
|
step_type: Literal[StepType.memory_retrieval.value] = (
|
||||||
StepType.memory_retrieval.value
|
StepType.memory_retrieval.value
|
||||||
)
|
)
|
||||||
memory_bank_ids: List[str]
|
vector_db_ids: str
|
||||||
inserted_context: InterleavedContent
|
inserted_context: InterleavedContent
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -208,7 +208,7 @@ class EventLogger:
|
||||||
):
|
):
|
||||||
details = event.payload.step_details
|
details = event.payload.step_details
|
||||||
inserted_context = interleaved_content_as_str(details.inserted_context)
|
inserted_context = interleaved_content_as_str(details.inserted_context)
|
||||||
content = f"fetched {len(inserted_context)} bytes from {details.memory_bank_ids}"
|
content = f"fetched {len(inserted_context)} bytes from {details.vector_db_ids}"
|
||||||
|
|
||||||
yield (
|
yield (
|
||||||
event,
|
event,
|
||||||
|
|
|
@ -37,5 +37,5 @@ class Resource(BaseModel):
|
||||||
provider_id: str = Field(description="ID of the provider that owns this resource")
|
provider_id: str = Field(description="ID of the provider that owns this resource")
|
||||||
|
|
||||||
type: ResourceType = Field(
|
type: ResourceType = Field(
|
||||||
description="Type of resource (e.g. 'model', 'shield', 'memory_bank', etc.)"
|
description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)"
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,7 +9,7 @@ import os
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from llama_stack.apis.inference import Model
|
from llama_stack.apis.inference import Model
|
||||||
from llama_stack.apis.memory_banks import VectorMemoryBank
|
from llama_stack.apis.vector_dbs import VectorDB
|
||||||
|
|
||||||
from llama_stack.distribution.store.registry import (
|
from llama_stack.distribution.store.registry import (
|
||||||
CachedDiskDistributionRegistry,
|
CachedDiskDistributionRegistry,
|
||||||
|
@ -42,13 +42,12 @@ async def cached_registry(config):
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_bank():
|
def sample_vector_db():
|
||||||
return VectorMemoryBank(
|
return VectorDB(
|
||||||
identifier="test_bank",
|
identifier="test_vector_db",
|
||||||
embedding_model="all-MiniLM-L6-v2",
|
embedding_model="all-MiniLM-L6-v2",
|
||||||
chunk_size_in_tokens=512,
|
embedding_dimension=384,
|
||||||
overlap_size_in_tokens=64,
|
provider_resource_id="test_vector_db",
|
||||||
provider_resource_id="test_bank",
|
|
||||||
provider_id="test-provider",
|
provider_id="test-provider",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -70,19 +69,17 @@ async def test_registry_initialization(registry):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_basic_registration(registry, sample_bank, sample_model):
|
async def test_basic_registration(registry, sample_vector_db, sample_model):
|
||||||
print(f"Registering {sample_bank}")
|
print(f"Registering {sample_vector_db}")
|
||||||
await registry.register(sample_bank)
|
await registry.register(sample_vector_db)
|
||||||
print(f"Registering {sample_model}")
|
print(f"Registering {sample_model}")
|
||||||
await registry.register(sample_model)
|
await registry.register(sample_model)
|
||||||
print("Getting bank")
|
print("Getting vector_db")
|
||||||
result_bank = await registry.get("memory_bank", "test_bank")
|
result_vector_db = await registry.get("vector_db", "test_vector_db")
|
||||||
assert result_bank is not None
|
assert result_vector_db is not None
|
||||||
assert result_bank.identifier == sample_bank.identifier
|
assert result_vector_db.identifier == sample_vector_db.identifier
|
||||||
assert result_bank.embedding_model == sample_bank.embedding_model
|
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
|
||||||
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens
|
assert result_vector_db.provider_id == sample_vector_db.provider_id
|
||||||
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens
|
|
||||||
assert result_bank.provider_id == sample_bank.provider_id
|
|
||||||
|
|
||||||
result_model = await registry.get("model", "test_model")
|
result_model = await registry.get("model", "test_model")
|
||||||
assert result_model is not None
|
assert result_model is not None
|
||||||
|
@ -91,24 +88,23 @@ async def test_basic_registration(registry, sample_bank, sample_model):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_cached_registry_initialization(config, sample_bank, sample_model):
|
async def test_cached_registry_initialization(config, sample_vector_db, sample_model):
|
||||||
# First populate the disk registry
|
# First populate the disk registry
|
||||||
disk_registry = DiskDistributionRegistry(await kvstore_impl(config))
|
disk_registry = DiskDistributionRegistry(await kvstore_impl(config))
|
||||||
await disk_registry.initialize()
|
await disk_registry.initialize()
|
||||||
await disk_registry.register(sample_bank)
|
await disk_registry.register(sample_vector_db)
|
||||||
await disk_registry.register(sample_model)
|
await disk_registry.register(sample_model)
|
||||||
|
|
||||||
# Test cached version loads from disk
|
# Test cached version loads from disk
|
||||||
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
||||||
await cached_registry.initialize()
|
await cached_registry.initialize()
|
||||||
|
|
||||||
result_bank = await cached_registry.get("memory_bank", "test_bank")
|
result_vector_db = await cached_registry.get("vector_db", "test_vector_db")
|
||||||
assert result_bank is not None
|
assert result_vector_db is not None
|
||||||
assert result_bank.identifier == sample_bank.identifier
|
assert result_vector_db.identifier == sample_vector_db.identifier
|
||||||
assert result_bank.embedding_model == sample_bank.embedding_model
|
assert result_vector_db.embedding_model == sample_vector_db.embedding_model
|
||||||
assert result_bank.chunk_size_in_tokens == sample_bank.chunk_size_in_tokens
|
assert result_vector_db.embedding_dimension == sample_vector_db.embedding_dimension
|
||||||
assert result_bank.overlap_size_in_tokens == sample_bank.overlap_size_in_tokens
|
assert result_vector_db.provider_id == sample_vector_db.provider_id
|
||||||
assert result_bank.provider_id == sample_bank.provider_id
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@ -116,29 +112,28 @@ async def test_cached_registry_updates(config):
|
||||||
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
||||||
await cached_registry.initialize()
|
await cached_registry.initialize()
|
||||||
|
|
||||||
new_bank = VectorMemoryBank(
|
new_vector_db = VectorDB(
|
||||||
identifier="test_bank_2",
|
identifier="test_vector_db_2",
|
||||||
embedding_model="all-MiniLM-L6-v2",
|
embedding_model="all-MiniLM-L6-v2",
|
||||||
chunk_size_in_tokens=256,
|
embedding_dimension=384,
|
||||||
overlap_size_in_tokens=32,
|
provider_resource_id="test_vector_db_2",
|
||||||
provider_resource_id="test_bank_2",
|
|
||||||
provider_id="baz",
|
provider_id="baz",
|
||||||
)
|
)
|
||||||
await cached_registry.register(new_bank)
|
await cached_registry.register(new_vector_db)
|
||||||
|
|
||||||
# Verify in cache
|
# Verify in cache
|
||||||
result_bank = await cached_registry.get("memory_bank", "test_bank_2")
|
result_vector_db = await cached_registry.get("vector_db", "test_vector_db_2")
|
||||||
assert result_bank is not None
|
assert result_vector_db is not None
|
||||||
assert result_bank.identifier == new_bank.identifier
|
assert result_vector_db.identifier == new_vector_db.identifier
|
||||||
assert result_bank.provider_id == new_bank.provider_id
|
assert result_vector_db.provider_id == new_vector_db.provider_id
|
||||||
|
|
||||||
# Verify persisted to disk
|
# Verify persisted to disk
|
||||||
new_registry = DiskDistributionRegistry(await kvstore_impl(config))
|
new_registry = DiskDistributionRegistry(await kvstore_impl(config))
|
||||||
await new_registry.initialize()
|
await new_registry.initialize()
|
||||||
result_bank = await new_registry.get("memory_bank", "test_bank_2")
|
result_vector_db = await new_registry.get("vector_db", "test_vector_db_2")
|
||||||
assert result_bank is not None
|
assert result_vector_db is not None
|
||||||
assert result_bank.identifier == new_bank.identifier
|
assert result_vector_db.identifier == new_vector_db.identifier
|
||||||
assert result_bank.provider_id == new_bank.provider_id
|
assert result_vector_db.provider_id == new_vector_db.provider_id
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
@ -146,30 +141,28 @@ async def test_duplicate_provider_registration(config):
|
||||||
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
cached_registry = CachedDiskDistributionRegistry(await kvstore_impl(config))
|
||||||
await cached_registry.initialize()
|
await cached_registry.initialize()
|
||||||
|
|
||||||
original_bank = VectorMemoryBank(
|
original_vector_db = VectorDB(
|
||||||
identifier="test_bank_2",
|
identifier="test_vector_db_2",
|
||||||
embedding_model="all-MiniLM-L6-v2",
|
embedding_model="all-MiniLM-L6-v2",
|
||||||
chunk_size_in_tokens=256,
|
embedding_dimension=384,
|
||||||
overlap_size_in_tokens=32,
|
provider_resource_id="test_vector_db_2",
|
||||||
provider_resource_id="test_bank_2",
|
|
||||||
provider_id="baz",
|
provider_id="baz",
|
||||||
)
|
)
|
||||||
await cached_registry.register(original_bank)
|
await cached_registry.register(original_vector_db)
|
||||||
|
|
||||||
duplicate_bank = VectorMemoryBank(
|
duplicate_vector_db = VectorDB(
|
||||||
identifier="test_bank_2",
|
identifier="test_vector_db_2",
|
||||||
embedding_model="different-model",
|
embedding_model="different-model",
|
||||||
chunk_size_in_tokens=128,
|
embedding_dimension=384,
|
||||||
overlap_size_in_tokens=16,
|
provider_resource_id="test_vector_db_2",
|
||||||
provider_resource_id="test_bank_2",
|
|
||||||
provider_id="baz", # Same provider_id
|
provider_id="baz", # Same provider_id
|
||||||
)
|
)
|
||||||
await cached_registry.register(duplicate_bank)
|
await cached_registry.register(duplicate_vector_db)
|
||||||
|
|
||||||
result = await cached_registry.get("memory_bank", "test_bank_2")
|
result = await cached_registry.get("vector_db", "test_vector_db_2")
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert (
|
assert (
|
||||||
result.embedding_model == original_bank.embedding_model
|
result.embedding_model == original_vector_db.embedding_model
|
||||||
) # Original values preserved
|
) # Original values preserved
|
||||||
|
|
||||||
|
|
||||||
|
@ -179,36 +172,35 @@ async def test_get_all_objects(config):
|
||||||
await cached_registry.initialize()
|
await cached_registry.initialize()
|
||||||
|
|
||||||
# Create multiple test banks
|
# Create multiple test banks
|
||||||
test_banks = [
|
test_vector_dbs = [
|
||||||
VectorMemoryBank(
|
VectorDB(
|
||||||
identifier=f"test_bank_{i}",
|
identifier=f"test_vector_db_{i}",
|
||||||
embedding_model="all-MiniLM-L6-v2",
|
embedding_model="all-MiniLM-L6-v2",
|
||||||
chunk_size_in_tokens=256,
|
embedding_dimension=384,
|
||||||
overlap_size_in_tokens=32,
|
provider_resource_id=f"test_vector_db_{i}",
|
||||||
provider_resource_id=f"test_bank_{i}",
|
|
||||||
provider_id=f"provider_{i}",
|
provider_id=f"provider_{i}",
|
||||||
)
|
)
|
||||||
for i in range(3)
|
for i in range(3)
|
||||||
]
|
]
|
||||||
|
|
||||||
# Register all banks
|
# Register all vector_dbs
|
||||||
for bank in test_banks:
|
for vector_db in test_vector_dbs:
|
||||||
await cached_registry.register(bank)
|
await cached_registry.register(vector_db)
|
||||||
|
|
||||||
# Test get_all retrieval
|
# Test get_all retrieval
|
||||||
all_results = await cached_registry.get_all()
|
all_results = await cached_registry.get_all()
|
||||||
assert len(all_results) == 3
|
assert len(all_results) == 3
|
||||||
|
|
||||||
# Verify each bank was stored correctly
|
# Verify each vector_db was stored correctly
|
||||||
for original_bank in test_banks:
|
for original_vector_db in test_vector_dbs:
|
||||||
matching_banks = [
|
matching_vector_dbs = [
|
||||||
b for b in all_results if b.identifier == original_bank.identifier
|
v for v in all_results if v.identifier == original_vector_db.identifier
|
||||||
]
|
]
|
||||||
assert len(matching_banks) == 1
|
assert len(matching_vector_dbs) == 1
|
||||||
stored_bank = matching_banks[0]
|
stored_vector_db = matching_vector_dbs[0]
|
||||||
assert stored_bank.embedding_model == original_bank.embedding_model
|
assert stored_vector_db.embedding_model == original_vector_db.embedding_model
|
||||||
assert stored_bank.provider_id == original_bank.provider_id
|
assert stored_vector_db.provider_id == original_vector_db.provider_id
|
||||||
assert stored_bank.chunk_size_in_tokens == original_bank.chunk_size_in_tokens
|
|
||||||
assert (
|
assert (
|
||||||
stored_bank.overlap_size_in_tokens == original_bank.overlap_size_in_tokens
|
stored_vector_db.embedding_dimension
|
||||||
|
== original_vector_db.embedding_dimension
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import streamlit as st
|
|
||||||
from modules.api import llama_stack_api
|
|
||||||
|
|
||||||
|
|
||||||
def memory_banks():
|
|
||||||
st.header("Memory Banks")
|
|
||||||
memory_banks_info = {
|
|
||||||
m.identifier: m.to_dict() for m in llama_stack_api.client.memory_banks.list()
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(memory_banks_info) > 0:
|
|
||||||
selected_memory_bank = st.selectbox(
|
|
||||||
"Select a memory bank", list(memory_banks_info.keys())
|
|
||||||
)
|
|
||||||
st.json(memory_banks_info[selected_memory_bank])
|
|
||||||
else:
|
|
||||||
st.info("No memory banks found")
|
|
|
@ -6,10 +6,10 @@
|
||||||
|
|
||||||
from page.distribution.datasets import datasets
|
from page.distribution.datasets import datasets
|
||||||
from page.distribution.eval_tasks import eval_tasks
|
from page.distribution.eval_tasks import eval_tasks
|
||||||
from page.distribution.memory_banks import memory_banks
|
|
||||||
from page.distribution.models import models
|
from page.distribution.models import models
|
||||||
from page.distribution.scoring_functions import scoring_functions
|
from page.distribution.scoring_functions import scoring_functions
|
||||||
from page.distribution.shields import shields
|
from page.distribution.shields import shields
|
||||||
|
from page.distribution.vector_dbs import vector_dbs
|
||||||
|
|
||||||
from streamlit_option_menu import option_menu
|
from streamlit_option_menu import option_menu
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ from streamlit_option_menu import option_menu
|
||||||
def resources_page():
|
def resources_page():
|
||||||
options = [
|
options = [
|
||||||
"Models",
|
"Models",
|
||||||
"Memory Banks",
|
"Vector Databases",
|
||||||
"Shields",
|
"Shields",
|
||||||
"Scoring Functions",
|
"Scoring Functions",
|
||||||
"Datasets",
|
"Datasets",
|
||||||
|
@ -37,8 +37,8 @@ def resources_page():
|
||||||
)
|
)
|
||||||
if selected_resource == "Eval Tasks":
|
if selected_resource == "Eval Tasks":
|
||||||
eval_tasks()
|
eval_tasks()
|
||||||
elif selected_resource == "Memory Banks":
|
elif selected_resource == "Vector Databases":
|
||||||
memory_banks()
|
vector_dbs()
|
||||||
elif selected_resource == "Datasets":
|
elif selected_resource == "Datasets":
|
||||||
datasets()
|
datasets()
|
||||||
elif selected_resource == "Models":
|
elif selected_resource == "Models":
|
||||||
|
|
23
llama_stack/distribution/ui/page/distribution/vector_dbs.py
Normal file
23
llama_stack/distribution/ui/page/distribution/vector_dbs.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import streamlit as st
|
||||||
|
from modules.api import llama_stack_api
|
||||||
|
|
||||||
|
|
||||||
|
def vector_dbs():
|
||||||
|
st.header("Vector Databases")
|
||||||
|
vector_dbs_info = {
|
||||||
|
v.identifier: v.to_dict() for v in llama_stack_api.client.vector_dbs.list()
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(vector_dbs_info) > 0:
|
||||||
|
selected_vector_db = st.selectbox(
|
||||||
|
"Select a vector database", list(vector_dbs_info.keys())
|
||||||
|
)
|
||||||
|
st.json(vector_dbs_info[selected_vector_db])
|
||||||
|
else:
|
||||||
|
st.info("No vector databases found")
|
|
@ -29,12 +29,12 @@ def rag_chat_page():
|
||||||
if uploaded_files:
|
if uploaded_files:
|
||||||
st.success(f"Successfully uploaded {len(uploaded_files)} files")
|
st.success(f"Successfully uploaded {len(uploaded_files)} files")
|
||||||
# Add memory bank name input field
|
# Add memory bank name input field
|
||||||
memory_bank_name = st.text_input(
|
vector_db_name = st.text_input(
|
||||||
"Memory Bank Name",
|
"Vector Database Name",
|
||||||
value="rag_bank",
|
value="rag_vector_db",
|
||||||
help="Enter a unique identifier for this memory bank",
|
help="Enter a unique identifier for this vector database",
|
||||||
)
|
)
|
||||||
if st.button("Create Memory Bank"):
|
if st.button("Create Vector Database"):
|
||||||
documents = [
|
documents = [
|
||||||
Document(
|
Document(
|
||||||
document_id=uploaded_file.name,
|
document_id=uploaded_file.name,
|
||||||
|
@ -44,37 +44,33 @@ def rag_chat_page():
|
||||||
]
|
]
|
||||||
|
|
||||||
providers = llama_stack_api.client.providers.list()
|
providers = llama_stack_api.client.providers.list()
|
||||||
memory_provider = None
|
vector_io_provider = None
|
||||||
|
|
||||||
for x in providers:
|
for x in providers:
|
||||||
if x.api == "memory":
|
if x.api == "vector_io":
|
||||||
memory_provider = x.provider_id
|
vector_io_provider = x.provider_id
|
||||||
|
|
||||||
llama_stack_api.client.memory_banks.register(
|
llama_stack_api.client.vector_dbs.register(
|
||||||
memory_bank_id=memory_bank_name, # Use the user-provided name
|
vector_db_id=vector_db_name, # Use the user-provided name
|
||||||
params={
|
embedding_dimension=384,
|
||||||
"memory_bank_type": "vector",
|
embedding_model="all-MiniLM-L6-v2",
|
||||||
"embedding_model": "all-MiniLM-L6-v2",
|
provider_id=vector_io_provider,
|
||||||
"chunk_size_in_tokens": 512,
|
|
||||||
"overlap_size_in_tokens": 64,
|
|
||||||
},
|
|
||||||
provider_id=memory_provider,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# insert documents using the custom bank name
|
# insert documents using the custom vector db name
|
||||||
llama_stack_api.client.memory.insert(
|
llama_stack_api.client.tool_runtime.rag_tool.insert(
|
||||||
bank_id=memory_bank_name, # Use the user-provided name
|
vector_db_id=vector_db_name, # Use the user-provided name
|
||||||
documents=documents,
|
documents=documents,
|
||||||
)
|
)
|
||||||
st.success("Memory bank created successfully!")
|
st.success("Vector database created successfully!")
|
||||||
|
|
||||||
st.subheader("Configure Agent")
|
st.subheader("Configure Agent")
|
||||||
# select memory banks
|
# select memory banks
|
||||||
memory_banks = llama_stack_api.client.memory_banks.list()
|
vector_dbs = llama_stack_api.client.vector_dbs.list()
|
||||||
memory_banks = [bank.identifier for bank in memory_banks]
|
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
|
||||||
selected_memory_banks = st.multiselect(
|
selected_vector_dbs = st.multiselect(
|
||||||
"Select Memory Banks",
|
"Select Vector Databases",
|
||||||
memory_banks,
|
vector_dbs,
|
||||||
)
|
)
|
||||||
|
|
||||||
available_models = llama_stack_api.client.models.list()
|
available_models = llama_stack_api.client.models.list()
|
||||||
|
@ -141,14 +137,14 @@ def rag_chat_page():
|
||||||
dict(
|
dict(
|
||||||
name="builtin::memory",
|
name="builtin::memory",
|
||||||
args={
|
args={
|
||||||
"memory_bank_ids": [bank_id for bank_id in selected_memory_banks],
|
"vector_db_ids": [
|
||||||
|
vector_db_id for vector_db_id in selected_vector_dbs
|
||||||
|
],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
tool_choice="auto",
|
tool_choice="auto",
|
||||||
tool_prompt_format="json",
|
tool_prompt_format="json",
|
||||||
input_shields=[],
|
|
||||||
output_shields=[],
|
|
||||||
enable_session_persistence=False,
|
enable_session_persistence=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -14,8 +14,10 @@ from .config import ChromaInlineImplConfig
|
||||||
async def get_provider_impl(
|
async def get_provider_impl(
|
||||||
config: ChromaInlineImplConfig, deps: Dict[Api, ProviderSpec]
|
config: ChromaInlineImplConfig, deps: Dict[Api, ProviderSpec]
|
||||||
):
|
):
|
||||||
from llama_stack.providers.remote.memory.chroma.chroma import ChromaMemoryAdapter
|
from llama_stack.providers.remote.vector_io.chroma.chroma import (
|
||||||
|
ChromaVectorIOAdapter,
|
||||||
|
)
|
||||||
|
|
||||||
impl = ChromaMemoryAdapter(config, deps[Api.inference])
|
impl = ChromaVectorIOAdapter(config, deps[Api.inference])
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -14,8 +14,8 @@ from .config import ChromaRemoteImplConfig
|
||||||
async def get_adapter_impl(
|
async def get_adapter_impl(
|
||||||
config: ChromaRemoteImplConfig, deps: Dict[Api, ProviderSpec]
|
config: ChromaRemoteImplConfig, deps: Dict[Api, ProviderSpec]
|
||||||
):
|
):
|
||||||
from .chroma import ChromaMemoryAdapter
|
from .chroma import ChromaVectorIOAdapter
|
||||||
|
|
||||||
impl = ChromaMemoryAdapter(config, deps[Api.inference])
|
impl = ChromaVectorIOAdapter(config, deps[Api.inference])
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -86,13 +86,13 @@ class ChromaIndex(EmbeddingIndex):
|
||||||
await maybe_await(self.client.delete_collection(self.collection.name))
|
await maybe_await(self.client.delete_collection(self.collection.name))
|
||||||
|
|
||||||
|
|
||||||
class ChromaMemoryAdapter(VectorIO, VectorDBsProtocolPrivate):
|
class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
config: Union[ChromaRemoteImplConfig, ChromaInlineImplConfig],
|
config: Union[ChromaRemoteImplConfig, ChromaInlineImplConfig],
|
||||||
inference_api: Api.inference,
|
inference_api: Api.inference,
|
||||||
) -> None:
|
) -> None:
|
||||||
log.info(f"Initializing ChromaMemoryAdapter with url: {config}")
|
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
|
||||||
self.config = config
|
self.config = config
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from llama_models.sku_list import all_registered_models
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelInput
|
from llama_stack.apis.models import ModelInput
|
||||||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::bedrock"],
|
"inference": ["remote::bedrock"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["remote::bedrock"],
|
"safety": ["remote::bedrock"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -34,7 +34,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
name = "bedrock"
|
name = "bedrock"
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -78,7 +78,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models,
|
default_models=default_models,
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::bedrock
|
- remote::bedrock
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -5,17 +5,17 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: bedrock
|
- provider_id: bedrock
|
||||||
provider_type: remote::bedrock
|
provider_type: remote::bedrock
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -104,7 +104,7 @@ models:
|
||||||
provider_model_id: meta.llama3-1-405b-instruct-v1:0
|
provider_model_id: meta.llama3-1-405b-instruct-v1:0
|
||||||
model_type: llm
|
model_type: llm
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -6,7 +6,7 @@ distribution_spec:
|
||||||
- remote::cerebras
|
- remote::cerebras
|
||||||
safety:
|
safety:
|
||||||
- inline::llama-guard
|
- inline::llama-guard
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -13,7 +13,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupIn
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
|
from llama_stack.providers.remote.inference.cerebras.cerebras import model_aliases
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
@ -23,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::cerebras"],
|
"inference": ["remote::cerebras"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"eval": ["inline::meta-reference"],
|
"eval": ["inline::meta-reference"],
|
||||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
@ -68,7 +68,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"embedding_dimension": 384,
|
"embedding_dimension": 384,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models + [embedding_model],
|
default_models=default_models + [embedding_model],
|
||||||
default_shields=[],
|
default_shields=[],
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: cerebras
|
- provider_id: cerebras
|
||||||
|
@ -24,7 +24,7 @@ providers:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -106,7 +106,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -60,7 +60,7 @@ providers:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -82,7 +82,7 @@ metadata_store:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||||
models: []
|
models: []
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::fireworks
|
- remote::fireworks
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -18,7 +18,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
@ -27,7 +27,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::fireworks"],
|
"inference": ["remote::fireworks"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -107,7 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models + [embedding_model],
|
default_models=default_models + [embedding_model],
|
||||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||||
|
@ -119,7 +119,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
inference_provider,
|
inference_provider,
|
||||||
embedding_provider,
|
embedding_provider,
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
"safety": [
|
"safety": [
|
||||||
Provider(
|
Provider(
|
||||||
provider_id="llama-guard",
|
provider_id="llama-guard",
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: fireworks
|
- provider_id: fireworks
|
||||||
|
@ -20,7 +20,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -161,7 +161,7 @@ shields:
|
||||||
provider_id: llama-guard-vision
|
provider_id: llama-guard-vision
|
||||||
- shield_id: CodeScanner
|
- shield_id: CodeScanner
|
||||||
provider_id: code-scanner
|
provider_id: code-scanner
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: fireworks
|
- provider_id: fireworks
|
||||||
|
@ -20,7 +20,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -150,7 +150,7 @@ models:
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields:
|
shields:
|
||||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::hf::endpoint
|
- remote::hf::endpoint
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
|
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::hf::endpoint"],
|
"inference": ["remote::hf::endpoint"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -48,7 +48,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -97,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
@ -115,7 +115,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: hf-endpoint
|
- provider_id: hf-endpoint
|
||||||
|
@ -25,7 +25,7 @@ providers:
|
||||||
config:
|
config:
|
||||||
endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME}
|
endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME}
|
||||||
api_token: ${env.HF_API_TOKEN}
|
api_token: ${env.HF_API_TOKEN}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -113,7 +113,7 @@ models:
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL}
|
- shield_id: ${env.SAFETY_MODEL}
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: hf-endpoint
|
- provider_id: hf-endpoint
|
||||||
|
@ -20,7 +20,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -103,7 +103,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::hf::serverless
|
- remote::hf::serverless
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -14,7 +14,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::hf::serverless"],
|
"inference": ["remote::hf::serverless"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -49,7 +49,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -98,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
@ -116,7 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: hf-serverless
|
- provider_id: hf-serverless
|
||||||
|
@ -25,7 +25,7 @@ providers:
|
||||||
config:
|
config:
|
||||||
huggingface_repo: ${env.SAFETY_MODEL}
|
huggingface_repo: ${env.SAFETY_MODEL}
|
||||||
api_token: ${env.HF_API_TOKEN}
|
api_token: ${env.HF_API_TOKEN}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -113,7 +113,7 @@ models:
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL}
|
- shield_id: ${env.SAFETY_MODEL}
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: hf-serverless
|
- provider_id: hf-serverless
|
||||||
|
@ -20,7 +20,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -103,7 +103,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -19,14 +19,14 @@ from llama_stack.providers.inline.inference.meta_reference import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["inline::meta-reference"],
|
"inference": ["inline::meta-reference"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -55,7 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -103,7 +103,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
@ -122,7 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: meta-reference-inference
|
- provider_id: meta-reference-inference
|
||||||
|
@ -27,7 +27,7 @@ providers:
|
||||||
model: ${env.SAFETY_MODEL}
|
model: ${env.SAFETY_MODEL}
|
||||||
max_seq_len: 4096
|
max_seq_len: 4096
|
||||||
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
|
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -115,7 +115,7 @@ models:
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL}
|
- shield_id: ${env.SAFETY_MODEL}
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: meta-reference-inference
|
- provider_id: meta-reference-inference
|
||||||
|
@ -21,7 +21,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -104,7 +104,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- inline::meta-reference-quantized
|
- inline::meta-reference-quantized
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -14,14 +14,14 @@ from llama_stack.providers.inline.inference.meta_reference import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["inline::meta-reference-quantized"],
|
"inference": ["inline::meta-reference-quantized"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -64,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: meta-reference-inference
|
- provider_id: meta-reference-inference
|
||||||
|
@ -23,7 +23,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -106,7 +106,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::nvidia
|
- remote::nvidia
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
safety:
|
safety:
|
||||||
- inline::llama-guard
|
- inline::llama-guard
|
||||||
|
|
|
@ -17,7 +17,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::nvidia"],
|
"inference": ["remote::nvidia"],
|
||||||
"memory": ["inline::faiss"],
|
"vector_io": ["inline::faiss"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: nvidia
|
- provider_id: nvidia
|
||||||
|
@ -17,7 +17,7 @@ providers:
|
||||||
config:
|
config:
|
||||||
url: https://integrate.api.nvidia.com
|
url: https://integrate.api.nvidia.com
|
||||||
api_key: ${env.NVIDIA_API_KEY}
|
api_key: ${env.NVIDIA_API_KEY}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -136,7 +136,7 @@ models:
|
||||||
provider_model_id: meta/llama-3.2-90b-vision-instruct
|
provider_model_id: meta/llama-3.2-90b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::ollama
|
- remote::ollama
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::ollama"],
|
"inference": ["remote::ollama"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -49,7 +49,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -98,7 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
@ -109,7 +109,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
inference_provider,
|
inference_provider,
|
||||||
embedding_provider,
|
embedding_provider,
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
"safety": [
|
"safety": [
|
||||||
Provider(
|
Provider(
|
||||||
provider_id="llama-guard",
|
provider_id="llama-guard",
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: ollama
|
- provider_id: ollama
|
||||||
|
@ -19,7 +19,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -110,7 +110,7 @@ shields:
|
||||||
provider_id: llama-guard
|
provider_id: llama-guard
|
||||||
- shield_id: CodeScanner
|
- shield_id: CodeScanner
|
||||||
provider_id: code-scanner
|
provider_id: code-scanner
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: ollama
|
- provider_id: ollama
|
||||||
|
@ -19,7 +19,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -99,7 +99,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::vllm
|
- remote::vllm
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: vllm-inference
|
- provider_id: vllm-inference
|
||||||
|
@ -27,7 +27,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -115,7 +115,7 @@ models:
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL}
|
- shield_id: ${env.SAFETY_MODEL}
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: vllm-inference
|
- provider_id: vllm-inference
|
||||||
|
@ -21,7 +21,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -104,7 +104,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::vllm"],
|
"inference": ["remote::vllm"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"eval": ["inline::meta-reference"],
|
"eval": ["inline::meta-reference"],
|
||||||
|
@ -52,7 +52,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
@ -118,7 +118,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
embedding_provider,
|
embedding_provider,
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::tgi
|
- remote::tgi
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: tgi-inference
|
- provider_id: tgi-inference
|
||||||
|
@ -20,7 +20,7 @@ providers:
|
||||||
provider_type: remote::tgi
|
provider_type: remote::tgi
|
||||||
config:
|
config:
|
||||||
url: ${env.TGI_SAFETY_URL}
|
url: ${env.TGI_SAFETY_URL}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -103,7 +103,7 @@ models:
|
||||||
model_type: llm
|
model_type: llm
|
||||||
shields:
|
shields:
|
||||||
- shield_id: ${env.SAFETY_MODEL}
|
- shield_id: ${env.SAFETY_MODEL}
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: tgi-inference
|
- provider_id: tgi-inference
|
||||||
|
@ -19,7 +19,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -102,7 +102,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::tgi"],
|
"inference": ["remote::tgi"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -52,7 +52,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -101,7 +101,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
@ -118,7 +118,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: together
|
- provider_id: together
|
||||||
|
@ -20,7 +20,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -156,7 +156,7 @@ shields:
|
||||||
provider_id: llama-guard-vision
|
provider_id: llama-guard-vision
|
||||||
- shield_id: CodeScanner
|
- shield_id: CodeScanner
|
||||||
provider_id: code-scanner
|
provider_id: code-scanner
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- vector_io
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: together
|
- provider_id: together
|
||||||
|
@ -145,6 +145,7 @@ models:
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields:
|
shields:
|
||||||
- shield_id: meta-llama/Llama-Guard-3-8B
|
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||||
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -18,7 +18,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||||
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
@ -27,7 +27,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::together"],
|
"inference": ["remote::together"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -48,7 +48,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="remote::together",
|
provider_type="remote::together",
|
||||||
config=TogetherImplConfig.sample_run_config(),
|
config=TogetherImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -105,7 +105,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models + [embedding_model],
|
default_models=default_models + [embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
@ -117,7 +117,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
inference_provider,
|
inference_provider,
|
||||||
embedding_provider,
|
embedding_provider,
|
||||||
],
|
],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
"safety": [
|
"safety": [
|
||||||
Provider(
|
Provider(
|
||||||
provider_id="llama-guard",
|
provider_id="llama-guard",
|
||||||
|
|
|
@ -4,7 +4,7 @@ distribution_spec:
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- inline::vllm
|
- inline::vllm
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
|
|
@ -5,11 +5,11 @@ apis:
|
||||||
- datasetio
|
- datasetio
|
||||||
- eval
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- scoring
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: vllm
|
- provider_id: vllm
|
||||||
|
@ -23,7 +23,7 @@ providers:
|
||||||
- provider_id: sentence-transformers
|
- provider_id: sentence-transformers
|
||||||
provider_type: inline::sentence-transformers
|
provider_type: inline::sentence-transformers
|
||||||
config: {}
|
config: {}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -106,7 +106,7 @@ models:
|
||||||
provider_id: sentence-transformers
|
provider_id: sentence-transformers
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
shields: []
|
shields: []
|
||||||
memory_banks: []
|
vector_dbs: []
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
|
|
@ -10,7 +10,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.inference.vllm import VLLMConfig
|
from llama_stack.providers.inline.inference.vllm import VLLMConfig
|
||||||
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import (
|
from llama_stack.templates.template import (
|
||||||
DistributionTemplate,
|
DistributionTemplate,
|
||||||
RunConfigSettings,
|
RunConfigSettings,
|
||||||
|
@ -21,7 +21,7 @@ from llama_stack.templates.template import (
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["inline::vllm"],
|
"inference": ["inline::vllm"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
@ -43,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::vllm",
|
provider_type="inline::vllm",
|
||||||
config=VLLMConfig.sample_run_config(),
|
config=VLLMConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
memory_provider = Provider(
|
vector_io_provider = Provider(
|
||||||
provider_id="faiss",
|
provider_id="faiss",
|
||||||
provider_type="inline::faiss",
|
provider_type="inline::faiss",
|
||||||
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
"memory": [memory_provider],
|
"vector_io": [vector_io_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model, embedding_model],
|
default_models=[inference_model, embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue