Merge branch 'main' of github.com:llamastack/llama-stack

rh-pre-commit.version: 2.3.2
rh-pre-commit.check-secrets: ENABLED
This commit is contained in:
Antony Sallas 2025-10-22 18:22:40 +08:00
commit b469ffce35
334 changed files with 96977 additions and 19023 deletions

View file

@ -545,6 +545,7 @@ class OpenAIResponseObject(BaseModel):
:param tools: (Optional) An array of tools the model may call while generating a response.
:param truncation: (Optional) Truncation strategy applied to the response
:param usage: (Optional) Token usage information for the response
:param instructions: (Optional) System message inserted into the model's context
"""
created_at: int
@ -564,6 +565,7 @@ class OpenAIResponseObject(BaseModel):
tools: list[OpenAIResponseTool] | None = None
truncation: str | None = None
usage: OpenAIResponseUsage | None = None
instructions: str | None = None
@json_schema_type

View file

@ -173,7 +173,9 @@ class ConversationItemDeletedResource(BaseModel):
@runtime_checkable
@trace_protocol
class Conversations(Protocol):
"""Protocol for conversation management operations."""
"""Conversations
Protocol for conversation management operations."""
@webmethod(route="/conversations", method="POST", level=LLAMA_STACK_API_V1)
async def create_conversation(
@ -181,6 +183,8 @@ class Conversations(Protocol):
) -> Conversation:
"""Create a conversation.
Create a conversation.
:param items: Initial items to include in the conversation context.
:param metadata: Set of key-value pairs that can be attached to an object.
:returns: The created conversation object.
@ -189,7 +193,9 @@ class Conversations(Protocol):
@webmethod(route="/conversations/{conversation_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_conversation(self, conversation_id: str) -> Conversation:
"""Get a conversation with the given ID.
"""Retrieve a conversation.
Get a conversation with the given ID.
:param conversation_id: The conversation identifier.
:returns: The conversation object.
@ -198,7 +204,9 @@ class Conversations(Protocol):
@webmethod(route="/conversations/{conversation_id}", method="POST", level=LLAMA_STACK_API_V1)
async def update_conversation(self, conversation_id: str, metadata: Metadata) -> Conversation:
"""Update a conversation's metadata with the given ID.
"""Update a conversation.
Update a conversation's metadata with the given ID.
:param conversation_id: The conversation identifier.
:param metadata: Set of key-value pairs that can be attached to an object.
@ -208,7 +216,9 @@ class Conversations(Protocol):
@webmethod(route="/conversations/{conversation_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_conversation(self, conversation_id: str) -> ConversationDeletedResource:
"""Delete a conversation with the given ID.
"""Delete a conversation.
Delete a conversation with the given ID.
:param conversation_id: The conversation identifier.
:returns: The deleted conversation resource.
@ -217,7 +227,9 @@ class Conversations(Protocol):
@webmethod(route="/conversations/{conversation_id}/items", method="POST", level=LLAMA_STACK_API_V1)
async def add_items(self, conversation_id: str, items: list[ConversationItem]) -> ConversationItemList:
"""Create items in the conversation.
"""Create items.
Create items in the conversation.
:param conversation_id: The conversation identifier.
:param items: Items to include in the conversation context.
@ -227,7 +239,9 @@ class Conversations(Protocol):
@webmethod(route="/conversations/{conversation_id}/items/{item_id}", method="GET", level=LLAMA_STACK_API_V1)
async def retrieve(self, conversation_id: str, item_id: str) -> ConversationItem:
"""Retrieve a conversation item.
"""Retrieve an item.
Retrieve a conversation item.
:param conversation_id: The conversation identifier.
:param item_id: The item identifier.
@ -244,7 +258,9 @@ class Conversations(Protocol):
limit: int | NotGiven = NOT_GIVEN,
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
) -> ConversationItemList:
"""List items in the conversation.
"""List items.
List items in the conversation.
:param conversation_id: The conversation identifier.
:param after: An item ID to list items after, used in pagination.
@ -259,7 +275,9 @@ class Conversations(Protocol):
async def openai_delete_conversation_item(
self, conversation_id: str, item_id: str
) -> ConversationItemDeletedResource:
"""Delete a conversation item.
"""Delete an item.
Delete a conversation item.
:param conversation_id: The conversation identifier.
:param item_id: The item identifier.

View file

@ -121,6 +121,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
models = "models"
shields = "shields"
vector_stores = "vector_stores" # only used for routing table
datasets = "datasets"
scoring_functions = "scoring_functions"
benchmarks = "benchmarks"

View file

@ -82,7 +82,9 @@ class EvaluateResponse(BaseModel):
class Eval(Protocol):
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
"""Evaluations
Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)

View file

@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
class ResourceType(StrEnum):
model = "model"
shield = "shield"
vector_db = "vector_db"
vector_store = "vector_store"
dataset = "dataset"
scoring_function = "scoring_function"
benchmark = "benchmark"
@ -34,4 +34,4 @@ class Resource(BaseModel):
provider_id: str = Field(description="ID of the provider that owns this resource")
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_db', etc.)")
type: ResourceType = Field(description="Type of resource (e.g. 'model', 'shield', 'vector_store', etc.)")

View file

@ -15,7 +15,7 @@ from fastapi import Body
from pydantic import BaseModel, Field
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
@ -140,6 +140,7 @@ class VectorStoreFileCounts(BaseModel):
total: int
# TODO: rename this as OpenAIVectorStore
@json_schema_type
class VectorStoreObject(BaseModel):
"""OpenAI Vector Store object.
@ -517,17 +518,18 @@ class OpenAICreateVectorStoreFileBatchRequestWithExtraBody(BaseModel, extra="all
chunking_strategy: VectorStoreChunkingStrategy | None = None
class VectorDBStore(Protocol):
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
class VectorStoreTable(Protocol):
def get_vector_store(self, vector_store_id: str) -> VectorStore | None: ...
@runtime_checkable
@trace_protocol
class VectorIO(Protocol):
vector_db_store: VectorDBStore | None = None
vector_store_table: VectorStoreTable | None = None
# this will just block now until chunks are inserted, but it should
# probably return a Job instance which can be polled for completion
# TODO: rename vector_db_id to vector_store_id once Stainless is working
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
async def insert_chunks(
self,
@ -546,6 +548,7 @@ class VectorIO(Protocol):
"""
...
# TODO: rename vector_db_id to vector_store_id once Stainless is working
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
async def query_chunks(
self,

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .vector_stores import *

View file

@ -9,53 +9,43 @@ from typing import Literal
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class VectorDB(Resource):
# Internal resource type for storing the vector store routing and other information
class VectorStore(Resource):
"""Vector database resource for storing and querying vector embeddings.
:param type: Type of resource, always 'vector_db' for vector databases
:param type: Type of resource, always 'vector_store' for vector stores
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
"""
type: Literal[ResourceType.vector_db] = ResourceType.vector_db
type: Literal[ResourceType.vector_store] = ResourceType.vector_store
embedding_model: str
embedding_dimension: int
vector_db_name: str | None = None
vector_store_name: str | None = None
@property
def vector_db_id(self) -> str:
def vector_store_id(self) -> str:
return self.identifier
@property
def provider_vector_db_id(self) -> str | None:
def provider_vector_store_id(self) -> str | None:
return self.provider_resource_id
class VectorDBInput(BaseModel):
class VectorStoreInput(BaseModel):
"""Input parameters for creating or configuring a vector database.
:param vector_db_id: Unique identifier for the vector database
:param vector_store_id: Unique identifier for the vector store
:param embedding_model: Name of the embedding model to use for vector generation
:param embedding_dimension: Dimension of the embedding vectors
:param provider_vector_db_id: (Optional) Provider-specific identifier for the vector database
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
"""
vector_db_id: str
vector_store_id: str
embedding_model: str
embedding_dimension: int
provider_id: str | None = None
provider_vector_db_id: str | None = None
class ListVectorDBsResponse(BaseModel):
"""Response from listing vector databases.
:param data: List of vector databases
"""
data: list[VectorDB]
provider_vector_store_id: str | None = None

View file

@ -6,6 +6,8 @@
import argparse
from llama_stack.log import setup_logging
from .stack import StackParser
from .stack.utils import print_subcommand_description
@ -42,6 +44,9 @@ class LlamaCLIParser:
def main():
# Initialize logging from environment variables before any other operations
setup_logging()
parser = LlamaCLIParser()
args = parser.parse_args()
parser.run(args)

View file

@ -1,490 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import importlib.resources
import json
import os
import shutil
import sys
import textwrap
from functools import lru_cache
from importlib.abc import Traversable
from pathlib import Path
import yaml
from prompt_toolkit import prompt
from prompt_toolkit.completion import WordCompleter
from prompt_toolkit.validation import Validator
from termcolor import colored, cprint
from llama_stack.cli.stack.utils import ImageType
from llama_stack.cli.table import print_table
from llama_stack.core.build import (
SERVER_DEPENDENCIES,
build_image,
get_provider_dependencies,
)
from llama_stack.core.configure import parse_and_maybe_upgrade_config
from llama_stack.core.datatypes import (
BuildConfig,
BuildProvider,
DistributionSpec,
Provider,
StackRunConfig,
)
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.external import load_external_apis
from llama_stack.core.resolver import InvalidProviderError
from llama_stack.core.stack import replace_env_vars
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.exec import formulate_run_args, run_command
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@lru_cache
def available_distros_specs() -> dict[str, BuildConfig]:
import yaml
distro_specs = {}
for p in DISTRIBS_PATH.rglob("*build.yaml"):
distro_name = p.parent.name
with open(p) as f:
build_config = BuildConfig(**yaml.safe_load(f))
distro_specs[distro_name] = build_config
return distro_specs
def run_stack_build_command(args: argparse.Namespace) -> None:
if args.list_distros:
return _run_distro_list_cmd()
if args.image_type == ImageType.VENV.value:
current_venv = os.environ.get("VIRTUAL_ENV")
image_name = args.image_name or current_venv
else:
image_name = args.image_name
if args.template:
cprint(
"The --template argument is deprecated. Please use --distro instead.",
color="red",
file=sys.stderr,
)
distro_name = args.template
else:
distro_name = args.distribution
if distro_name:
available_distros = available_distros_specs()
if distro_name not in available_distros:
cprint(
f"Could not find distribution {distro_name}. Please run `llama stack build --list-distros` to check out the available distributions",
color="red",
file=sys.stderr,
)
sys.exit(1)
build_config = available_distros[distro_name]
if args.image_type:
build_config.image_type = args.image_type
else:
cprint(
f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {distro_name}",
color="red",
file=sys.stderr,
)
sys.exit(1)
elif args.providers:
provider_list: dict[str, list[BuildProvider]] = dict()
for api_provider in args.providers.split(","):
if "=" not in api_provider:
cprint(
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
color="red",
file=sys.stderr,
)
sys.exit(1)
api, provider_type = api_provider.split("=")
providers_for_api = get_provider_registry().get(Api(api), None)
if providers_for_api is None:
cprint(
f"{api} is not a valid API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
if provider_type in providers_for_api:
provider = BuildProvider(
provider_type=provider_type,
module=None,
)
provider_list.setdefault(api, []).append(provider)
else:
cprint(
f"{provider} is not a valid provider for the {api} API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
distribution_spec = DistributionSpec(
providers=provider_list,
description=",".join(args.providers),
)
if not args.image_type:
cprint(
f"Please specify a image-type (container | venv) for {args.template}",
color="red",
file=sys.stderr,
)
sys.exit(1)
build_config = BuildConfig(image_type=args.image_type, distribution_spec=distribution_spec)
elif not args.config and not distro_name:
name = prompt(
"> Enter a name for your Llama Stack (e.g. my-local-stack): ",
validator=Validator.from_callable(
lambda x: len(x) > 0,
error_message="Name cannot be empty, please enter a name",
),
)
image_type = prompt(
"> Enter the image type you want your Llama Stack to be built as (use <TAB> to see options): ",
completer=WordCompleter([e.value for e in ImageType]),
complete_while_typing=True,
validator=Validator.from_callable(
lambda x: x in [e.value for e in ImageType],
error_message="Invalid image type. Use <TAB> to see options",
),
)
image_name = f"llamastack-{name}"
cprint(
textwrap.dedent(
"""
Llama Stack is composed of several APIs working together. Let's select
the provider types (implementations) you want to use for these APIs.
""",
),
color="green",
file=sys.stderr,
)
cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
providers: dict[str, list[BuildProvider]] = dict()
for api, providers_for_api in get_provider_registry().items():
available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
if not available_providers:
continue
api_provider = prompt(
f"> Enter provider for API {api.value}: ",
completer=WordCompleter(available_providers),
complete_while_typing=True,
validator=Validator.from_callable(
lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847
error_message="Invalid provider, use <TAB> to see options",
),
)
string_providers = api_provider.split(" ")
for provider in string_providers:
providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider))
description = prompt(
"\n > (Optional) Enter a short description for your Llama Stack: ",
default="",
)
distribution_spec = DistributionSpec(
providers=providers,
description=description,
)
build_config = BuildConfig(image_type=image_type, distribution_spec=distribution_spec)
else:
with open(args.config) as f:
try:
contents = yaml.safe_load(f)
contents = replace_env_vars(contents)
build_config = BuildConfig(**contents)
if args.image_type:
build_config.image_type = args.image_type
except Exception as e:
cprint(
f"Could not parse config file {args.config}: {e}",
color="red",
file=sys.stderr,
)
sys.exit(1)
if args.print_deps_only:
print(f"# Dependencies for {distro_name or args.config or image_name}")
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
normal_deps += SERVER_DEPENDENCIES
print(f"uv pip install {' '.join(normal_deps)}")
for special_dep in special_deps:
print(f"uv pip install {special_dep}")
for external_dep in external_provider_dependencies:
print(f"uv pip install {external_dep}")
return
try:
run_config = _run_stack_build_command_from_build_config(
build_config,
image_name=image_name,
config_path=args.config,
distro_name=distro_name,
)
except (Exception, RuntimeError) as exc:
import traceback
cprint(
f"Error building stack: {exc}",
color="red",
file=sys.stderr,
)
cprint("Stack trace:", color="red", file=sys.stderr)
traceback.print_exc()
sys.exit(1)
if run_config is None:
cprint(
"Run config path is empty",
color="red",
file=sys.stderr,
)
sys.exit(1)
if args.run:
config_dict = yaml.safe_load(run_config.read_text())
config = parse_and_maybe_upgrade_config(config_dict)
if config.external_providers_dir and not config.external_providers_dir.exists():
config.external_providers_dir.mkdir(exist_ok=True)
run_args = formulate_run_args(args.image_type, image_name or config.image_name)
run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", str(run_config)])
run_command(run_args)
def _generate_run_config(
build_config: BuildConfig,
build_dir: Path,
image_name: str,
) -> Path:
"""
Generate a run.yaml template file for user to edit from a build.yaml file
"""
apis = list(build_config.distribution_spec.providers.keys())
run_config = StackRunConfig(
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
image_name=image_name,
apis=apis,
providers={},
external_providers_dir=build_config.external_providers_dir
if build_config.external_providers_dir
else EXTERNAL_PROVIDERS_DIR,
)
if not run_config.inference_store:
run_config.inference_store = SqliteSqlStoreConfig(
**SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
)
)
# build providers dict
provider_registry = get_provider_registry(build_config)
for api in apis:
run_config.providers[api] = []
providers = build_config.distribution_spec.providers[api]
for provider in providers:
pid = provider.provider_type.split("::")[-1]
p = provider_registry[Api(api)][provider.provider_type]
if p.deprecation_error:
raise InvalidProviderError(p.deprecation_error)
try:
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
except (ModuleNotFoundError, ValueError) as exc:
# HACK ALERT:
# This code executes after building is done, the import cannot work since the
# package is either available in the venv or container - not available on the host.
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
# external
cprint(
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
color="yellow",
file=sys.stderr,
)
# Set config_type to None to avoid UnboundLocalError
config_type = None
if config_type is not None and hasattr(config_type, "sample_run_config"):
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
else:
config = {}
p_spec = Provider(
provider_id=pid,
provider_type=provider.provider_type,
config=config,
module=provider.module,
)
run_config.providers[api].append(p_spec)
run_config_file = build_dir / f"{image_name}-run.yaml"
with open(run_config_file, "w") as f:
to_write = json.loads(run_config.model_dump_json())
f.write(yaml.dump(to_write, sort_keys=False))
# Only print this message for non-container builds since it will be displayed before the
# container is built
# For non-container builds, the run.yaml is generated at the very end of the build process so it
# makes sense to display this message
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
return run_config_file
def _run_stack_build_command_from_build_config(
build_config: BuildConfig,
image_name: str | None = None,
distro_name: str | None = None,
config_path: str | None = None,
) -> Path | Traversable:
image_name = image_name or build_config.image_name
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
if distro_name:
image_name = f"distribution-{distro_name}"
else:
if not image_name:
raise ValueError("Please specify an image name when building a container image without a template")
else:
if not image_name and os.environ.get("UV_SYSTEM_PYTHON"):
image_name = "__system__"
if not image_name:
raise ValueError("Please specify an image name when building a venv image")
# At this point, image_name should be guaranteed to be a string
if image_name is None:
raise ValueError("image_name should not be None after validation")
if distro_name:
build_dir = DISTRIBS_BASE_DIR / distro_name
build_file_path = build_dir / f"{distro_name}-build.yaml"
else:
if image_name is None:
raise ValueError("image_name cannot be None")
build_dir = DISTRIBS_BASE_DIR / image_name
build_file_path = build_dir / f"{image_name}-build.yaml"
os.makedirs(build_dir, exist_ok=True)
run_config_file = None
# Generate the run.yaml so it can be included in the container image with the proper entrypoint
# Only do this if we're building a container image and we're not using a template
if build_config.image_type == LlamaStackImageType.CONTAINER.value and not distro_name and config_path:
cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
run_config_file = _generate_run_config(build_config, build_dir, image_name)
with open(build_file_path, "w") as f:
to_write = json.loads(build_config.model_dump_json(exclude_none=True))
f.write(yaml.dump(to_write, sort_keys=False))
# We first install the external APIs so that the build process can use them and discover the
# providers dependencies
if build_config.external_apis_dir:
cprint("Installing external APIs", color="yellow", file=sys.stderr)
external_apis = load_external_apis(build_config)
if external_apis:
# install the external APIs
packages = []
for _, api_spec in external_apis.items():
if api_spec.pip_packages:
packages.extend(api_spec.pip_packages)
cprint(
f"Installing {api_spec.name} with pip packages {api_spec.pip_packages}",
color="yellow",
file=sys.stderr,
)
return_code = run_command(["uv", "pip", "install", *packages])
if return_code != 0:
packages_str = ", ".join(packages)
raise RuntimeError(
f"Failed to install external APIs packages: {packages_str} (return code: {return_code})"
)
return_code = build_image(
build_config,
image_name,
distro_or_config=distro_name or config_path or str(build_file_path),
run_config=run_config_file.as_posix() if run_config_file else None,
)
if return_code != 0:
raise RuntimeError(f"Failed to build image {image_name}")
if distro_name:
# copy run.yaml from distribution to build_dir instead of generating it again
distro_path = importlib.resources.files("llama_stack") / f"distributions/{distro_name}/run.yaml"
run_config_file = build_dir / f"{distro_name}-run.yaml"
with importlib.resources.as_file(distro_path) as path:
shutil.copy(path, run_config_file)
cprint("Build Successful!", color="green", file=sys.stderr)
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
if build_config.image_type == LlamaStackImageType.VENV:
cprint(
"You can run the new Llama Stack distro (after activating "
+ colored(image_name, "cyan")
+ ") via: "
+ colored(f"llama stack run {run_config_file}", "blue"),
color="green",
file=sys.stderr,
)
elif build_config.image_type == LlamaStackImageType.CONTAINER:
cprint(
"You can run the container with: "
+ colored(
f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"
),
color="green",
file=sys.stderr,
)
return distro_path
else:
return _generate_run_config(build_config, build_dir, image_name)
def _run_distro_list_cmd() -> None:
headers = [
"Distribution Name",
# "Providers",
"Description",
]
rows = []
for distro_name, spec in available_distros_specs().items():
rows.append(
[
distro_name,
# json.dumps(spec.distribution_spec.providers, indent=2),
spec.distribution_spec.description,
]
)
print_table(
rows,
headers,
separate_rows=True,
)

View file

@ -0,0 +1,182 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import sys
from pathlib import Path
import yaml
from termcolor import cprint
from llama_stack.cli.stack.utils import ImageType
from llama_stack.core.build import get_provider_dependencies
from llama_stack.core.datatypes import (
BuildConfig,
BuildProvider,
DistributionSpec,
)
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.stack import replace_env_vars
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates"
logger = get_logger(name=__name__, category="cli")
# These are the dependencies needed by the distribution server.
# `llama-stack` is automatically installed by the installation script.
SERVER_DEPENDENCIES = [
"aiosqlite",
"fastapi",
"fire",
"httpx",
"uvicorn",
"opentelemetry-sdk",
"opentelemetry-exporter-otlp-proto-http",
]
def format_output_deps_only(
normal_deps: list[str],
special_deps: list[str],
external_deps: list[str],
uv: bool = False,
) -> str:
"""Format dependencies as a list."""
lines = []
uv_str = ""
if uv:
uv_str = "uv pip install "
# Quote deps with commas
quoted_normal_deps = [quote_if_needed(dep) for dep in normal_deps]
lines.append(f"{uv_str}{' '.join(quoted_normal_deps)}")
for special_dep in special_deps:
lines.append(f"{uv_str}{quote_special_dep(special_dep)}")
for external_dep in external_deps:
lines.append(f"{uv_str}{quote_special_dep(external_dep)}")
return "\n".join(lines)
def run_stack_list_deps_command(args: argparse.Namespace) -> None:
if args.config:
try:
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
config_file = resolve_config_or_distro(args.config, Mode.BUILD)
except ValueError as e:
cprint(
f"Could not parse config file {args.config}: {e}",
color="red",
file=sys.stderr,
)
sys.exit(1)
if config_file:
with open(config_file) as f:
try:
contents = yaml.safe_load(f)
contents = replace_env_vars(contents)
build_config = BuildConfig(**contents)
build_config.image_type = "venv"
except Exception as e:
cprint(
f"Could not parse config file {config_file}: {e}",
color="red",
file=sys.stderr,
)
sys.exit(1)
elif args.providers:
provider_list: dict[str, list[BuildProvider]] = dict()
for api_provider in args.providers.split(","):
if "=" not in api_provider:
cprint(
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
color="red",
file=sys.stderr,
)
sys.exit(1)
api, provider_type = api_provider.split("=")
providers_for_api = get_provider_registry().get(Api(api), None)
if providers_for_api is None:
cprint(
f"{api} is not a valid API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
if provider_type in providers_for_api:
provider = BuildProvider(
provider_type=provider_type,
module=None,
)
provider_list.setdefault(api, []).append(provider)
else:
cprint(
f"{provider_type} is not a valid provider for the {api} API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
distribution_spec = DistributionSpec(
providers=provider_list,
description=",".join(args.providers),
)
build_config = BuildConfig(image_type=ImageType.VENV.value, distribution_spec=distribution_spec)
normal_deps, special_deps, external_provider_dependencies = get_provider_dependencies(build_config)
normal_deps += SERVER_DEPENDENCIES
# Add external API dependencies
if build_config.external_apis_dir:
from llama_stack.core.external import load_external_apis
external_apis = load_external_apis(build_config)
if external_apis:
for _, api_spec in external_apis.items():
normal_deps.extend(api_spec.pip_packages)
# Format and output based on requested format
output = format_output_deps_only(
normal_deps=normal_deps,
special_deps=special_deps,
external_deps=external_provider_dependencies,
uv=args.format == "uv",
)
print(output)
def quote_if_needed(dep):
# Add quotes if the dependency contains special characters that need escaping in shell
# This includes: commas, comparison operators (<, >, <=, >=, ==, !=)
needs_quoting = any(char in dep for char in [",", "<", ">", "="])
return f"'{dep}'" if needs_quoting else dep
def quote_special_dep(dep_string):
"""
Quote individual packages in a special dependency string.
Special deps may contain multiple packages and flags like --extra-index-url.
We need to quote only the package specs that contain special characters.
"""
parts = dep_string.split()
quoted_parts = []
for part in parts:
# Don't quote flags (they start with -)
if part.startswith("-"):
quoted_parts.append(part)
else:
# Quote package specs that need it
quoted_parts.append(quote_if_needed(part))
return " ".join(quoted_parts)

View file

@ -1,100 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import textwrap
from llama_stack.cli.stack.utils import ImageType
from llama_stack.cli.subcommand import Subcommand
class StackBuild(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"build",
prog="llama stack build",
description="Build a Llama stack container",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_stack_build_command)
def _add_arguments(self):
self.parser.add_argument(
"--config",
type=str,
default=None,
help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively",
)
self.parser.add_argument(
"--template",
type=str,
default=None,
help="""(deprecated) Name of the example template config to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
)
self.parser.add_argument(
"--distro",
"--distribution",
dest="distribution",
type=str,
default=None,
help="""Name of the distribution to use for build. You may use `llama stack build --list-distros` to check out the available distributions""",
)
self.parser.add_argument(
"--list-distros",
"--list-distributions",
action="store_true",
dest="list_distros",
default=False,
help="Show the available distributions for building a Llama Stack distribution",
)
self.parser.add_argument(
"--image-type",
type=str,
help="Image Type to use for the build. If not specified, will use the image type from the template config.",
choices=[e.value for e in ImageType],
default=None, # no default so we can detect if a user specified --image-type and override image_type in the config
)
self.parser.add_argument(
"--image-name",
type=str,
help=textwrap.dedent(
f"""[for image-type={"|".join(e.value for e in ImageType)}] Name of the virtual environment to use for
the build. If not specified, currently active environment will be used if found.
"""
),
default=None,
)
self.parser.add_argument(
"--print-deps-only",
default=False,
action="store_true",
help="Print the dependencies for the stack only, without building the stack",
)
self.parser.add_argument(
"--run",
action="store_true",
default=False,
help="Run the stack after building using the same image type, name, and other applicable arguments",
)
self.parser.add_argument(
"--providers",
type=str,
default=None,
help="Build a config for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
)
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
# always keep implementation completely silo-ed away from CLI so CLI
# can be fast to load and reduces dependencies
from ._build import run_stack_build_command
return run_stack_build_command(args)

View file

@ -0,0 +1,51 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
from llama_stack.cli.subcommand import Subcommand
class StackListDeps(Subcommand):
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"list-deps",
prog="llama stack list-deps",
description="list the dependencies for a llama stack distribution",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._run_stack_list_deps_command)
def _add_arguments(self):
self.parser.add_argument(
"config",
type=str,
nargs="?", # Make it optional
metavar="config | distro",
help="Path to config file to use or name of known distro (llama stack list for a list).",
)
self.parser.add_argument(
"--providers",
type=str,
default=None,
help="sync dependencies for a list of providers and only those providers. This list is formatted like: api1=provider1,api2=provider2. Where there can be multiple providers per API.",
)
self.parser.add_argument(
"--format",
type=str,
choices=["uv", "deps-only"],
default="deps-only",
help="Output format: 'uv' shows shell commands, 'deps-only' shows just the list of dependencies without `uv` (default)",
)
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
# always keep implementation completely silo-ed away from CLI so CLI
# can be fast to load and reduces dependencies
from ._list_deps import run_stack_list_deps_command
return run_stack_list_deps_command(args)

View file

@ -11,8 +11,8 @@ from llama_stack.cli.stack.list_stacks import StackListBuilds
from llama_stack.cli.stack.utils import print_subcommand_description
from llama_stack.cli.subcommand import Subcommand
from .build import StackBuild
from .list_apis import StackListApis
from .list_deps import StackListDeps
from .list_providers import StackListProviders
from .remove import StackRemove
from .run import StackRun
@ -39,7 +39,7 @@ class StackParser(Subcommand):
subparsers = self.parser.add_subparsers(title="stack_subcommands")
# Add sub-commands
StackBuild.create(subparsers)
StackListDeps.create(subparsers)
StackListApis.create(subparsers)
StackListProviders.create(subparsers)
StackRun.create(subparsers)

View file

@ -4,7 +4,37 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import sys
from enum import Enum
from functools import lru_cache
from pathlib import Path
import yaml
from termcolor import cprint
from llama_stack.core.datatypes import (
BuildConfig,
Provider,
StackRunConfig,
StorageConfig,
)
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.resolver import InvalidProviderError
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
)
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api
TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions"
class ImageType(Enum):
@ -19,3 +49,103 @@ def print_subcommand_description(parser, subparsers):
description = subcommand.description
description_text += f" {name:<21} {description}\n"
parser.epilog = description_text
def generate_run_config(
build_config: BuildConfig,
build_dir: Path,
image_name: str,
) -> Path:
"""
Generate a run.yaml template file for user to edit from a build.yaml file
"""
apis = list(build_config.distribution_spec.providers.keys())
distro_dir = DISTRIBS_BASE_DIR / image_name
run_config = StackRunConfig(
container_image=(image_name if build_config.image_type == LlamaStackImageType.CONTAINER.value else None),
image_name=image_name,
apis=apis,
providers={},
storage=StorageConfig(
backends={
"kv_default": SqliteKVStoreConfig(db_path=str(distro_dir / "kvstore.db")),
"sql_default": SqliteSqlStoreConfig(db_path=str(distro_dir / "sql_store.db")),
},
stores=ServerStoresConfig(
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
),
),
external_providers_dir=build_config.external_providers_dir
if build_config.external_providers_dir
else EXTERNAL_PROVIDERS_DIR,
)
# build providers dict
provider_registry = get_provider_registry(build_config)
for api in apis:
run_config.providers[api] = []
providers = build_config.distribution_spec.providers[api]
for provider in providers:
pid = provider.provider_type.split("::")[-1]
p = provider_registry[Api(api)][provider.provider_type]
if p.deprecation_error:
raise InvalidProviderError(p.deprecation_error)
try:
config_type = instantiate_class_type(provider_registry[Api(api)][provider.provider_type].config_class)
except (ModuleNotFoundError, ValueError) as exc:
# HACK ALERT:
# This code executes after building is done, the import cannot work since the
# package is either available in the venv or container - not available on the host.
# TODO: use a "is_external" flag in ProviderSpec to check if the provider is
# external
cprint(
f"Failed to import provider {provider.provider_type} for API {api} - assuming it's external, skipping: {exc}",
color="yellow",
file=sys.stderr,
)
# Set config_type to None to avoid UnboundLocalError
config_type = None
if config_type is not None and hasattr(config_type, "sample_run_config"):
config = config_type.sample_run_config(__distro_dir__=f"~/.llama/distributions/{image_name}")
else:
config = {}
p_spec = Provider(
provider_id=pid,
provider_type=provider.provider_type,
config=config,
module=provider.module,
)
run_config.providers[api].append(p_spec)
run_config_file = build_dir / f"{image_name}-run.yaml"
with open(run_config_file, "w") as f:
to_write = json.loads(run_config.model_dump_json())
f.write(yaml.dump(to_write, sort_keys=False))
# Only print this message for non-container builds since it will be displayed before the
# container is built
# For non-container builds, the run.yaml is generated at the very end of the build process so it
# makes sense to display this message
if build_config.image_type != LlamaStackImageType.CONTAINER.value:
cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
return run_config_file
@lru_cache
def available_templates_specs() -> dict[str, BuildConfig]:
import yaml
template_specs = {}
for p in TEMPLATES_PATH.rglob("*build.yaml"):
template_name = p.parent.name
with open(p) as f:
build_config = BuildConfig(**yaml.safe_load(f))
template_specs[template_name] = build_config
return template_specs

View file

@ -41,7 +41,7 @@ class AccessRule(BaseModel):
A rule defines a list of action either to permit or to forbid. It may specify a
principal or a resource that must match for the rule to take effect. The resource
to match should be specified in the form of a type qualified identifier, e.g.
model::my-model or vector_db::some-db, or a wildcard for all resources of a type,
model::my-model or vector_store::some-db, or a wildcard for all resources of a type,
e.g. model::*. If the principal or resource are not specified, they will match all
requests.
@ -79,9 +79,9 @@ class AccessRule(BaseModel):
description: any user has read access to any resource created by a member of their team
- forbid:
actions: [create, read, delete]
resource: vector_db::*
resource: vector_store::*
unless: user with admin in roles
description: only user with admin role can use vector_db resources
description: only user with admin role can use vector_store resources
"""

View file

@ -1,410 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
PYPI_VERSION=${PYPI_VERSION:-}
BUILD_PLATFORM=${BUILD_PLATFORM:-}
# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
# mounting is not supported by docker buildx, so we use COPY instead
USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
# Path to the run.yaml file in the container
RUN_CONFIG_PATH=/app/run.yaml
BUILD_CONTEXT_DIR=$(pwd)
set -euo pipefail
# Define color codes
RED='\033[0;31m'
NC='\033[0m' # No Color
# Usage function
usage() {
echo "Usage: $0 --image-name <image_name> --container-base <container_base> --normal-deps <pip_dependencies> [--run-config <run_config>] [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
echo "Example: $0 --image-name llama-stack-img --container-base python:3.12-slim --normal-deps 'numpy pandas' --run-config ./run.yaml --external-provider-deps 'foo' --optional-deps 'bar'"
exit 1
}
# Parse arguments
image_name=""
container_base=""
normal_deps=""
external_provider_deps=""
optional_deps=""
run_config=""
distro_or_config=""
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
--image-name)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --image-name requires a string value" >&2
usage
fi
image_name="$2"
shift 2
;;
--container-base)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --container-base requires a string value" >&2
usage
fi
container_base="$2"
shift 2
;;
--normal-deps)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --normal-deps requires a string value" >&2
usage
fi
normal_deps="$2"
shift 2
;;
--external-provider-deps)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --external-provider-deps requires a string value" >&2
usage
fi
external_provider_deps="$2"
shift 2
;;
--optional-deps)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --optional-deps requires a string value" >&2
usage
fi
optional_deps="$2"
shift 2
;;
--run-config)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --run-config requires a string value" >&2
usage
fi
run_config="$2"
shift 2
;;
--distro-or-config)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --distro-or-config requires a string value" >&2
usage
fi
distro_or_config="$2"
shift 2
;;
*)
echo "Unknown option: $1" >&2
usage
;;
esac
done
# Check required arguments
if [[ -z "$image_name" || -z "$container_base" || -z "$normal_deps" ]]; then
echo "Error: --image-name, --container-base, and --normal-deps are required." >&2
usage
fi
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}
TEMP_DIR=$(mktemp -d)
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
source "$SCRIPT_DIR/common.sh"
add_to_container() {
output_file="$TEMP_DIR/Containerfile"
if [ -t 0 ]; then
printf '%s\n' "$1" >>"$output_file"
else
cat >>"$output_file"
fi
}
if ! is_command_available "$CONTAINER_BINARY"; then
printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
exit 1
fi
if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
add_to_container << EOF
FROM $container_base
WORKDIR /app
# We install the Python 3.12 dev headers and build tools so that any
# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully.
RUN dnf -y update && dnf install -y iputils git net-tools wget \
vim-minimal python3.12 python3.12-pip python3.12-wheel \
python3.12-setuptools python3.12-devel gcc gcc-c++ make && \
ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all
ENV UV_SYSTEM_PYTHON=1
RUN pip install uv
EOF
else
add_to_container << EOF
FROM $container_base
WORKDIR /app
RUN apt-get update && apt-get install -y \
iputils-ping net-tools iproute2 dnsutils telnet \
curl wget telnet git\
procps psmisc lsof \
traceroute \
bubblewrap \
gcc g++ \
&& rm -rf /var/lib/apt/lists/*
ENV UV_SYSTEM_PYTHON=1
RUN pip install uv
EOF
fi
# Add pip dependencies first since llama-stack is what will change most often
# so we can reuse layers.
if [ -n "$normal_deps" ]; then
read -ra pip_args <<< "$normal_deps"
quoted_deps=$(printf " %q" "${pip_args[@]}")
add_to_container << EOF
RUN uv pip install --no-cache $quoted_deps
EOF
fi
if [ -n "$optional_deps" ]; then
IFS='#' read -ra parts <<<"$optional_deps"
for part in "${parts[@]}"; do
read -ra pip_args <<< "$part"
quoted_deps=$(printf " %q" "${pip_args[@]}")
add_to_container <<EOF
RUN uv pip install --no-cache $quoted_deps
EOF
done
fi
if [ -n "$external_provider_deps" ]; then
IFS='#' read -ra parts <<<"$external_provider_deps"
for part in "${parts[@]}"; do
read -ra pip_args <<< "$part"
quoted_deps=$(printf " %q" "${pip_args[@]}")
add_to_container <<EOF
RUN uv pip install --no-cache $quoted_deps
EOF
add_to_container <<EOF
RUN python3 - <<PYTHON | uv pip install --no-cache -r -
import importlib
import sys
try:
package_name = '$part'.split('==')[0].split('>=')[0].split('<=')[0].split('!=')[0].split('<')[0].split('>')[0]
module = importlib.import_module(f'{package_name}.provider')
spec = module.get_provider_spec()
if hasattr(spec, 'pip_packages') and spec.pip_packages:
if isinstance(spec.pip_packages, (list, tuple)):
print('\n'.join(spec.pip_packages))
except Exception as e:
print(f'Error getting provider spec for {package_name}: {e}', file=sys.stderr)
PYTHON
EOF
done
fi
get_python_cmd() {
if is_command_available python; then
echo "python"
elif is_command_available python3; then
echo "python3"
else
echo "Error: Neither python nor python3 is installed. Please install Python to continue." >&2
exit 1
fi
}
if [ -n "$run_config" ]; then
# Copy the run config to the build context since it's an absolute path
cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
# Parse the run.yaml configuration to identify external provider directories
# If external providers are specified, copy their directory to the container
# and update the configuration to reference the new container path
python_cmd=$(get_python_cmd)
external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
external_providers_dir=$(eval echo "$external_providers_dir")
if [ -n "$external_providers_dir" ]; then
if [ -d "$external_providers_dir" ]; then
echo "Copying external providers directory: $external_providers_dir"
cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
add_to_container << EOF
COPY providers.d /.llama/providers.d
EOF
fi
# Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
if [ "$(uname)" = "Darwin" ]; then
sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
rm -f "$BUILD_CONTEXT_DIR/run.yaml.bak"
else
sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
fi
fi
# Copy run config into docker image
add_to_container << EOF
COPY run.yaml $RUN_CONFIG_PATH
EOF
fi
stack_mount="/app/llama-stack-source"
client_mount="/app/llama-stack-client-source"
install_local_package() {
local dir="$1"
local mount_point="$2"
local name="$3"
if [ ! -d "$dir" ]; then
echo "${RED}Warning: $name is set but directory does not exist: $dir${NC}" >&2
exit 1
fi
if [ "$USE_COPY_NOT_MOUNT" = "true" ]; then
add_to_container << EOF
COPY $dir $mount_point
EOF
fi
add_to_container << EOF
RUN uv pip install --no-cache -e $mount_point
EOF
}
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
install_local_package "$LLAMA_STACK_CLIENT_DIR" "$client_mount" "LLAMA_STACK_CLIENT_DIR"
fi
if [ -n "$LLAMA_STACK_DIR" ]; then
install_local_package "$LLAMA_STACK_DIR" "$stack_mount" "LLAMA_STACK_DIR"
else
if [ -n "$TEST_PYPI_VERSION" ]; then
# these packages are damaged in test-pypi, so install them first
add_to_container << EOF
RUN uv pip install --no-cache fastapi libcst
EOF
add_to_container << EOF
RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
--index-strategy unsafe-best-match \
llama-stack==$TEST_PYPI_VERSION
EOF
else
if [ -n "$PYPI_VERSION" ]; then
SPEC_VERSION="llama-stack==${PYPI_VERSION}"
else
SPEC_VERSION="llama-stack"
fi
add_to_container << EOF
RUN uv pip install --no-cache $SPEC_VERSION
EOF
fi
fi
# remove uv after installation
add_to_container << EOF
RUN pip uninstall -y uv
EOF
# If a run config is provided, we use the llama stack CLI
if [[ -n "$run_config" ]]; then
add_to_container << EOF
ENTRYPOINT ["llama", "stack", "run", "$RUN_CONFIG_PATH"]
EOF
elif [[ "$distro_or_config" != *.yaml ]]; then
add_to_container << EOF
ENTRYPOINT ["llama", "stack", "run", "$distro_or_config"]
EOF
fi
# Add other require item commands genearic to all containers
add_to_container << EOF
RUN mkdir -p /.llama /.cache && chmod -R g+rw /.llama /.cache && (chmod -R g+rw /app 2>/dev/null || true)
EOF
printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
cat "$TEMP_DIR"/Containerfile
printf "\n"
# Start building the CLI arguments
CLI_ARGS=()
# Read CONTAINER_OPTS and put it in an array
read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
if [ -n "$LLAMA_STACK_DIR" ]; then
CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
fi
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
fi
fi
if is_command_available selinuxenabled && selinuxenabled; then
# Disable SELinux labels -- we don't want to relabel the llama-stack source dir
CLI_ARGS+=("--security-opt" "label=disable")
fi
# Set version tag based on PyPI version
if [ -n "$PYPI_VERSION" ]; then
version_tag="$PYPI_VERSION"
elif [ -n "$TEST_PYPI_VERSION" ]; then
version_tag="test-$TEST_PYPI_VERSION"
elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_STACK_CLIENT_DIR" ]]; then
version_tag="dev"
else
URL="https://pypi.org/pypi/llama-stack/json"
version_tag=$(curl -s $URL | jq -r '.info.version')
fi
# Add version tag to image name
image_tag="$image_name:$version_tag"
# Detect platform architecture
ARCH=$(uname -m)
if [ -n "$BUILD_PLATFORM" ]; then
CLI_ARGS+=("--platform" "$BUILD_PLATFORM")
elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
CLI_ARGS+=("--platform" "linux/arm64")
elif [ "$ARCH" = "x86_64" ]; then
CLI_ARGS+=("--platform" "linux/amd64")
else
echo "Unsupported architecture: $ARCH"
exit 1
fi
echo "PWD: $(pwd)"
echo "Containerfile: $TEMP_DIR/Containerfile"
set -x
$CONTAINER_BINARY build \
"${CLI_ARGS[@]}" \
-t "$image_tag" \
-f "$TEMP_DIR/Containerfile" \
"$BUILD_CONTEXT_DIR"
# clean up tmp/configs
rm -rf "$BUILD_CONTEXT_DIR/run.yaml" "$TEMP_DIR"
set +x
echo "Success!"

View file

@ -1,220 +0,0 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
LLAMA_STACK_CLIENT_DIR=${LLAMA_STACK_CLIENT_DIR:-}
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
# This timeout (in seconds) is necessary when installing PyTorch via uv since it's likely to time out
# Reference: https://github.com/astral-sh/uv/pull/1694
UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
UV_SYSTEM_PYTHON=${UV_SYSTEM_PYTHON:-}
VIRTUAL_ENV=${VIRTUAL_ENV:-}
set -euo pipefail
# Define color codes
RED='\033[0;31m'
NC='\033[0m' # No Color
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
source "$SCRIPT_DIR/common.sh"
# Usage function
usage() {
echo "Usage: $0 --env-name <env_name> --normal-deps <pip_dependencies> [--external-provider-deps <external_provider_deps>] [--optional-deps <special_pip_deps>]"
echo "Example: $0 --env-name mybuild --normal-deps 'numpy pandas scipy' --external-provider-deps 'foo' --optional-deps 'bar'"
exit 1
}
# Parse arguments
env_name=""
normal_deps=""
external_provider_deps=""
optional_deps=""
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
--env-name)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --env-name requires a string value" >&2
usage
fi
env_name="$2"
shift 2
;;
--normal-deps)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --normal-deps requires a string value" >&2
usage
fi
normal_deps="$2"
shift 2
;;
--external-provider-deps)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --external-provider-deps requires a string value" >&2
usage
fi
external_provider_deps="$2"
shift 2
;;
--optional-deps)
if [[ -z "$2" || "$2" == --* ]]; then
echo "Error: --optional-deps requires a string value" >&2
usage
fi
optional_deps="$2"
shift 2
;;
*)
echo "Unknown option: $1" >&2
usage
;;
esac
done
# Check required arguments
if [[ -z "$env_name" || -z "$normal_deps" ]]; then
echo "Error: --env-name and --normal-deps are required." >&2
usage
fi
if [ -n "$LLAMA_STACK_DIR" ]; then
echo "Using llama-stack-dir=$LLAMA_STACK_DIR"
fi
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
echo "Using llama-stack-client-dir=$LLAMA_STACK_CLIENT_DIR"
fi
ENVNAME=""
# pre-run checks to make sure we can proceed with the installation
pre_run_checks() {
local env_name="$1"
if ! is_command_available uv; then
echo "uv is not installed, trying to install it."
if ! is_command_available pip; then
echo "pip is not installed, cannot automatically install 'uv'."
echo "Follow this link to install it:"
echo "https://docs.astral.sh/uv/getting-started/installation/"
exit 1
else
pip install uv
fi
fi
# checking if an environment with the same name already exists
if [ -d "$env_name" ]; then
echo "Environment '$env_name' already exists, re-using it."
fi
}
run() {
# Use only global variables set by flag parser
if [ -n "$UV_SYSTEM_PYTHON" ] || [ "$env_name" == "__system__" ]; then
echo "Installing dependencies in system Python environment"
export UV_SYSTEM_PYTHON=1
elif [ "$VIRTUAL_ENV" == "$env_name" ]; then
echo "Virtual environment $env_name is already active"
else
echo "Using virtual environment $env_name"
uv venv "$env_name"
source "$env_name/bin/activate"
fi
if [ -n "$TEST_PYPI_VERSION" ]; then
uv pip install fastapi libcst
uv pip install --extra-index-url https://test.pypi.org/simple/ \
--index-strategy unsafe-best-match \
llama-stack=="$TEST_PYPI_VERSION" \
$normal_deps
if [ -n "$optional_deps" ]; then
IFS='#' read -ra parts <<<"$optional_deps"
for part in "${parts[@]}"; do
echo "$part"
uv pip install $part
done
fi
if [ -n "$external_provider_deps" ]; then
IFS='#' read -ra parts <<<"$external_provider_deps"
for part in "${parts[@]}"; do
echo "$part"
uv pip install "$part"
done
fi
else
if [ -n "$LLAMA_STACK_DIR" ]; then
# only warn if DIR does not start with "git+"
if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then
printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2
exit 1
fi
printf "Installing from LLAMA_STACK_DIR: %s\n" "$LLAMA_STACK_DIR"
# editable only if LLAMA_STACK_DIR does not start with "git+"
if [[ "$LLAMA_STACK_DIR" != git+* ]]; then
EDITABLE="-e"
else
EDITABLE=""
fi
uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR"
else
uv pip install --no-cache-dir llama-stack
fi
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
# only warn if DIR does not start with "git+"
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2
exit 1
fi
printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR"
# editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+"
if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
EDITABLE="-e"
else
EDITABLE=""
fi
uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR"
fi
printf "Installing pip dependencies\n"
uv pip install $normal_deps
if [ -n "$optional_deps" ]; then
IFS='#' read -ra parts <<<"$optional_deps"
for part in "${parts[@]}"; do
echo "Installing special provider module: $part"
uv pip install $part
done
fi
if [ -n "$external_provider_deps" ]; then
IFS='#' read -ra parts <<<"$external_provider_deps"
for part in "${parts[@]}"; do
echo "Installing external provider module: $part"
uv pip install "$part"
echo "Getting provider spec for module: $part and installing dependencies"
package_name=$(echo "$part" | sed 's/[<>=!].*//')
python3 -c "
import importlib
import sys
try:
module = importlib.import_module(f'$package_name.provider')
spec = module.get_provider_spec()
if hasattr(spec, 'pip_packages') and spec.pip_packages:
print('\\n'.join(spec.pip_packages))
except Exception as e:
print(f'Error getting provider spec for $package_name: {e}', file=sys.stderr)
" | uv pip install -r -
done
fi
fi
}
pre_run_checks "$env_name"
run

View file

@ -159,6 +159,37 @@ def upgrade_from_routing_table(
config_dict["apis"] = config_dict["apis_to_serve"]
config_dict.pop("apis_to_serve", None)
# Add default storage config if not present
if "storage" not in config_dict:
config_dict["storage"] = {
"backends": {
"kv_default": {
"type": "kv_sqlite",
"db_path": "~/.llama/kvstore.db",
},
"sql_default": {
"type": "sql_sqlite",
"db_path": "~/.llama/sql_store.db",
},
},
"stores": {
"metadata": {
"namespace": "registry",
"backend": "kv_default",
},
"inference": {
"table_name": "inference_store",
"backend": "sql_default",
"max_write_queue_size": 10000,
"num_writers": 4,
},
"conversations": {
"table_name": "openai_conversations",
"backend": "sql_default",
},
},
}
return config_dict

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import secrets
import time
from typing import Any
@ -21,16 +20,11 @@ from llama_stack.apis.conversations.conversations import (
Conversations,
Metadata,
)
from llama_stack.core.datatypes import AccessRule
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.core.datatypes import AccessRule, StackRunConfig
from llama_stack.log import get_logger
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore
from llama_stack.providers.utils.sqlstore.sqlstore import (
SqliteSqlStoreConfig,
SqlStoreConfig,
sqlstore_impl,
)
from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl
logger = get_logger(name=__name__, category="openai_conversations")
@ -38,13 +32,11 @@ logger = get_logger(name=__name__, category="openai_conversations")
class ConversationServiceConfig(BaseModel):
"""Configuration for the built-in conversation service.
:param conversations_store: SQL store configuration for conversations (defaults to SQLite)
:param run_config: Stack run configuration for resolving persistence
:param policy: Access control rules
"""
conversations_store: SqlStoreConfig = SqliteSqlStoreConfig(
db_path=(DISTRIBS_BASE_DIR / "conversations.db").as_posix()
)
run_config: StackRunConfig
policy: list[AccessRule] = []
@ -63,14 +55,16 @@ class ConversationServiceImpl(Conversations):
self.deps = deps
self.policy = config.policy
base_sql_store = sqlstore_impl(config.conversations_store)
# Use conversations store reference from run config
conversations_ref = config.run_config.storage.stores.conversations
if not conversations_ref:
raise ValueError("storage.stores.conversations must be configured in run config")
base_sql_store = sqlstore_impl(conversations_ref)
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
async def initialize(self) -> None:
"""Initialize the store and create tables."""
if isinstance(self.config.conversations_store, SqliteSqlStoreConfig):
os.makedirs(os.path.dirname(self.config.conversations_store.db_path), exist_ok=True)
await self.sql_store.create_table(
"openai_conversations",
{

View file

@ -23,12 +23,15 @@ from llama_stack.apis.scoring import Scoring
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
from llama_stack.apis.shields import Shield, ShieldInput
from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.storage.datatypes import (
KVStoreReference,
StorageBackendType,
StorageConfig,
)
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig
LLAMA_STACK_BUILD_CONFIG_VERSION = 2
LLAMA_STACK_RUN_CONFIG_VERSION = 2
@ -68,7 +71,7 @@ class ShieldWithOwner(Shield, ResourceWithOwner):
pass
class VectorDBWithOwner(VectorDB, ResourceWithOwner):
class VectorStoreWithOwner(VectorStore, ResourceWithOwner):
pass
@ -88,12 +91,12 @@ class ToolGroupWithOwner(ToolGroup, ResourceWithOwner):
pass
RoutableObject = Model | Shield | VectorDB | Dataset | ScoringFn | Benchmark | ToolGroup
RoutableObject = Model | Shield | VectorStore | Dataset | ScoringFn | Benchmark | ToolGroup
RoutableObjectWithProvider = Annotated[
ModelWithOwner
| ShieldWithOwner
| VectorDBWithOwner
| VectorStoreWithOwner
| DatasetWithOwner
| ScoringFnWithOwner
| BenchmarkWithOwner
@ -351,12 +354,32 @@ class AuthenticationRequiredError(Exception):
pass
class QualifiedModel(BaseModel):
"""A qualified model identifier, consisting of a provider ID and a model ID."""
provider_id: str
model_id: str
class VectorStoresConfig(BaseModel):
"""Configuration for vector stores in the stack."""
default_provider_id: str | None = Field(
default=None,
description="ID of the vector_io provider to use as default when multiple providers are available and none is specified.",
)
default_embedding_model: QualifiedModel | None = Field(
default=None,
description="Default embedding model configuration for vector stores.",
)
class QuotaPeriod(StrEnum):
DAY = "day"
class QuotaConfig(BaseModel):
kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
kvstore: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
authenticated_max_requests: int = Field(
default=1000, description="Max requests for authenticated clients per period"
@ -399,6 +422,18 @@ def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | N
raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}")
class RegisteredResources(BaseModel):
"""Registry of resources available in the distribution."""
models: list[ModelInput] = Field(default_factory=list)
shields: list[ShieldInput] = Field(default_factory=list)
vector_stores: list[VectorStoreInput] = Field(default_factory=list)
datasets: list[DatasetInput] = Field(default_factory=list)
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
tool_groups: list[ToolGroupInput] = Field(default_factory=list)
class ServerConfig(BaseModel):
port: int = Field(
default=8321,
@ -438,18 +473,6 @@ class ServerConfig(BaseModel):
)
class InferenceStoreConfig(BaseModel):
sql_store_config: SqlStoreConfig
max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
num_writers: int = Field(default=4, description="Number of concurrent background writers")
class ResponsesStoreConfig(BaseModel):
sql_store_config: SqlStoreConfig
max_write_queue_size: int = Field(default=10000, description="Max queued writes for responses store")
num_writers: int = Field(default=4, description="Number of concurrent background writers")
class StackRunConfig(BaseModel):
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
@ -476,37 +499,15 @@ One or more providers to use for each API. The same provider_type (e.g., meta-re
can be instantiated multiple times (with different configs) if necessary.
""",
)
metadata_store: KVStoreConfig | None = Field(
default=None,
description="""
Configuration for the persistence store used by the distribution registry. If not specified,
a default SQLite store will be used.""",
storage: StorageConfig = Field(
description="Catalog of named storage backends and references available to the stack",
)
inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
default=None,
description="""
Configuration for the persistence store used by the inference API. Can be either a
InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
If not specified, a default SQLite store will be used.""",
registered_resources: RegisteredResources = Field(
default_factory=RegisteredResources,
description="Registry of resources available in the distribution",
)
conversations_store: SqlStoreConfig | None = Field(
default=None,
description="""
Configuration for the persistence store used by the conversations API.
If not specified, a default SQLite store will be used.""",
)
# registry of "resources" in the distribution
models: list[ModelInput] = Field(default_factory=list)
shields: list[ShieldInput] = Field(default_factory=list)
vector_dbs: list[VectorDBInput] = Field(default_factory=list)
datasets: list[DatasetInput] = Field(default_factory=list)
scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
benchmarks: list[BenchmarkInput] = Field(default_factory=list)
tool_groups: list[ToolGroupInput] = Field(default_factory=list)
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
@ -526,6 +527,11 @@ If not specified, a default SQLite store will be used.""",
description="Path to directory containing external API implementations. The APIs code and dependencies must be installed on the system.",
)
vector_stores: VectorStoresConfig | None = Field(
default=None,
description="Configuration for vector stores, including default embedding model",
)
@field_validator("external_providers_dir")
@classmethod
def validate_external_providers_dir(cls, v):
@ -535,6 +541,49 @@ If not specified, a default SQLite store will be used.""",
return Path(v)
return v
@model_validator(mode="after")
def validate_server_stores(self) -> "StackRunConfig":
backend_map = self.storage.backends
stores = self.storage.stores
kv_backends = {
name
for name, cfg in backend_map.items()
if cfg.type
in {
StorageBackendType.KV_REDIS,
StorageBackendType.KV_SQLITE,
StorageBackendType.KV_POSTGRES,
StorageBackendType.KV_MONGODB,
}
}
sql_backends = {
name
for name, cfg in backend_map.items()
if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
}
def _ensure_backend(reference, expected_set, store_name: str) -> None:
if reference is None:
return
backend_name = reference.backend
if backend_name not in backend_map:
raise ValueError(
f"{store_name} references unknown backend '{backend_name}'. "
f"Available backends: {sorted(backend_map)}"
)
if backend_name not in expected_set:
raise ValueError(
f"{store_name} references backend '{backend_name}' of type "
f"'{backend_map[backend_name].type.value}', but a backend of type "
f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
)
_ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
_ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
_ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
return self
class BuildConfig(BaseModel):
version: int = LLAMA_STACK_BUILD_CONFIG_VERSION

View file

@ -63,6 +63,10 @@ def builtin_automatically_routed_apis() -> list[AutoRoutedApiInfo]:
routing_table_api=Api.tool_groups,
router_api=Api.tool_runtime,
),
AutoRoutedApiInfo(
routing_table_api=Api.vector_stores,
router_api=Api.vector_io,
),
]

View file

@ -47,7 +47,7 @@ from llama_stack.core.stack import (
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.exec import in_notebook
from llama_stack.log import get_logger
from llama_stack.log import get_logger, setup_logging
from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
from llama_stack.strong_typing.inspection import is_unwrapped_body_param
@ -200,6 +200,9 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
skip_logger_removal: bool = False,
):
super().__init__()
# Initialize logging from environment variables first
setup_logging()
# when using the library client, we should not log to console since many
# of our logs are intended for server-side usage
if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
@ -278,7 +281,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
else:
prefix = "!" if in_notebook() else ""
cprint(
f"Please run:\n\n{prefix}llama stack build --distro {self.config_path_or_distro_name} --image-type venv\n\n",
f"Please run:\n\n{prefix}llama stack list-deps {self.config_path_or_distro_name} | xargs -L1 uv pip install\n\n",
"yellow",
file=sys.stderr,
)

View file

@ -11,9 +11,8 @@ from pydantic import BaseModel
from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
from llama_stack.core.datatypes import StackRunConfig
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
class PromptServiceConfig(BaseModel):
@ -41,10 +40,12 @@ class PromptServiceImpl(Prompts):
self.kvstore: KVStore
async def initialize(self) -> None:
kvstore_config = SqliteKVStoreConfig(
db_path=(DISTRIBS_BASE_DIR / self.config.run_config.image_name / "prompts.db").as_posix()
)
self.kvstore = await kvstore_impl(kvstore_config)
# Use metadata store backend with prompts-specific namespace
metadata_ref = self.config.run_config.storage.stores.metadata
if not metadata_ref:
raise ValueError("storage.stores.metadata must be configured in run config")
prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
self.kvstore = await kvstore_impl(prompts_ref)
def _get_default_key(self, prompt_id: str) -> str:
"""Get the KVStore key that stores the default version number."""

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import importlib
import importlib.metadata
import inspect
from typing import Any
@ -29,6 +30,7 @@ from llama_stack.apis.shields import Shields
from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.core.client import get_client_impl
from llama_stack.core.datatypes import (
@ -80,6 +82,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
Api.inspect: Inspect,
Api.batches: Batches,
Api.vector_io: VectorIO,
Api.vector_stores: VectorStore,
Api.models: Models,
Api.safety: Safety,
Api.shields: Shields,

View file

@ -6,7 +6,10 @@
from typing import Any
from llama_stack.core.datatypes import AccessRule, RoutedProtocol
from llama_stack.core.datatypes import (
AccessRule,
RoutedProtocol,
)
from llama_stack.core.stack import StackRunConfig
from llama_stack.core.store import DistributionRegistry
from llama_stack.providers.datatypes import Api, RoutingTable
@ -26,6 +29,7 @@ async def get_routing_table_impl(
from ..routing_tables.scoring_functions import ScoringFunctionsRoutingTable
from ..routing_tables.shields import ShieldsRoutingTable
from ..routing_tables.toolgroups import ToolGroupsRoutingTable
from ..routing_tables.vector_stores import VectorStoresRoutingTable
api_to_tables = {
"models": ModelsRoutingTable,
@ -34,6 +38,7 @@ async def get_routing_table_impl(
"scoring_functions": ScoringFunctionsRoutingTable,
"benchmarks": BenchmarksRoutingTable,
"tool_groups": ToolGroupsRoutingTable,
"vector_stores": VectorStoresRoutingTable,
}
if api.value not in api_to_tables:
@ -76,14 +81,21 @@ async def get_auto_router_impl(
api_to_dep_impl[dep_name] = deps[dep_api]
# TODO: move pass configs to routers instead
if api == Api.inference and run_config.inference_store:
if api == Api.inference:
inference_ref = run_config.storage.stores.inference
if not inference_ref:
raise ValueError("storage.stores.inference must be configured in run config")
inference_store = InferenceStore(
config=run_config.inference_store,
reference=inference_ref,
policy=policy,
)
await inference_store.initialize()
api_to_dep_impl["store"] = inference_store
elif api == Api.vector_io:
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
await impl.initialize()
return impl

View file

@ -37,24 +37,24 @@ class ToolRuntimeRouter(ToolRuntime):
async def query(
self,
content: InterleavedContent,
vector_db_ids: list[str],
vector_store_ids: list[str],
query_config: RAGQueryConfig | None = None,
) -> RAGQueryResult:
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_db_ids}")
logger.debug(f"ToolRuntimeRouter.RagToolImpl.query: {vector_store_ids}")
provider = await self.routing_table.get_provider_impl("knowledge_search")
return await provider.query(content, vector_db_ids, query_config)
return await provider.query(content, vector_store_ids, query_config)
async def insert(
self,
documents: list[RAGDocument],
vector_db_id: str,
vector_store_id: str,
chunk_size_in_tokens: int = 512,
) -> None:
logger.debug(
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
f"ToolRuntimeRouter.RagToolImpl.insert: {vector_store_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
)
provider = await self.routing_table.get_provider_impl("insert_into_memory")
return await provider.insert(documents, vector_db_id, chunk_size_in_tokens)
return await provider.insert(documents, vector_store_id, chunk_size_in_tokens)
def __init__(
self,

View file

@ -31,6 +31,7 @@ from llama_stack.apis.vector_io import (
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
@ -43,9 +44,11 @@ class VectorIORouter(VectorIO):
def __init__(
self,
routing_table: RoutingTable,
vector_stores_config: VectorStoresConfig | None = None,
) -> None:
logger.debug("Initializing VectorIORouter")
self.routing_table = routing_table
self.vector_stores_config = vector_stores_config
async def initialize(self) -> None:
logger.debug("VectorIORouter.initialize")
@ -68,25 +71,6 @@ class VectorIORouter(VectorIO):
raise ValueError(f"Embedding model '{embedding_model_id}' not found or not an embedding model")
async def register_vector_db(
self,
vector_db_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
vector_db_name: str | None = None,
provider_vector_db_id: str | None = None,
) -> None:
logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
await self.routing_table.register_vector_db(
vector_db_id,
embedding_model,
embedding_dimension,
provider_id,
vector_db_name,
provider_vector_db_id,
)
async def insert_chunks(
self,
vector_db_id: str,
@ -122,6 +106,17 @@ class VectorIORouter(VectorIO):
embedding_dimension = extra.get("embedding_dimension")
provider_id = extra.get("provider_id")
# Use default embedding model if not specified
if (
embedding_model is None
and self.vector_stores_config
and self.vector_stores_config.default_embedding_model is not None
):
# Construct the full model ID with provider prefix
embedding_provider_id = self.vector_stores_config.default_embedding_model.provider_id
model_id = self.vector_stores_config.default_embedding_model.model_id
embedding_model = f"{embedding_provider_id}/{model_id}"
if embedding_model is not None and embedding_dimension is None:
embedding_dimension = await self._get_embedding_model_dimension(embedding_model)
@ -132,28 +127,41 @@ class VectorIORouter(VectorIO):
raise ValueError("No vector_io providers available")
if num_providers > 1:
available_providers = list(self.routing_table.impls_by_provider_id.keys())
raise ValueError(
f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
f"Available providers: {available_providers}"
)
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
# Use default configured provider
if self.vector_stores_config and self.vector_stores_config.default_provider_id:
default_provider = self.vector_stores_config.default_provider_id
if default_provider in available_providers:
provider_id = default_provider
logger.debug(f"Using configured default vector store provider: {provider_id}")
else:
raise ValueError(
f"Configured default vector store provider '{default_provider}' not found. "
f"Available providers: {available_providers}"
)
else:
raise ValueError(
f"Multiple vector_io providers available. Please specify provider_id in extra_body. "
f"Available providers: {available_providers}"
)
else:
provider_id = list(self.routing_table.impls_by_provider_id.keys())[0]
vector_db_id = f"vs_{uuid.uuid4()}"
registered_vector_db = await self.routing_table.register_vector_db(
vector_db_id=vector_db_id,
vector_store_id = f"vs_{uuid.uuid4()}"
registered_vector_store = await self.routing_table.register_vector_store(
vector_store_id=vector_store_id,
embedding_model=embedding_model,
embedding_dimension=embedding_dimension,
provider_id=provider_id,
provider_vector_db_id=vector_db_id,
vector_db_name=params.name,
provider_vector_store_id=vector_store_id,
vector_store_name=params.name,
)
provider = await self.routing_table.get_provider_impl(registered_vector_db.identifier)
provider = await self.routing_table.get_provider_impl(registered_vector_store.identifier)
# Update model_extra with registered values so provider uses the already-registered vector_db
# Update model_extra with registered values so provider uses the already-registered vector_store
if params.model_extra is None:
params.model_extra = {}
params.model_extra["provider_vector_db_id"] = registered_vector_db.provider_resource_id
params.model_extra["provider_id"] = registered_vector_db.provider_id
params.model_extra["provider_vector_store_id"] = registered_vector_store.provider_resource_id
params.model_extra["provider_id"] = registered_vector_store.provider_id
if embedding_model is not None:
params.model_extra["embedding_model"] = embedding_model
if embedding_dimension is not None:
@ -171,15 +179,15 @@ class VectorIORouter(VectorIO):
logger.debug(f"VectorIORouter.openai_list_vector_stores: limit={limit}")
# Route to default provider for now - could aggregate from all providers in the future
# call retrieve on each vector dbs to get list of vector stores
vector_dbs = await self.routing_table.get_all_with_type("vector_db")
vector_stores = await self.routing_table.get_all_with_type("vector_store")
all_stores = []
for vector_db in vector_dbs:
for vector_store in vector_stores:
try:
provider = await self.routing_table.get_provider_impl(vector_db.identifier)
vector_store = await provider.openai_retrieve_vector_store(vector_db.identifier)
provider = await self.routing_table.get_provider_impl(vector_store.identifier)
vector_store = await provider.openai_retrieve_vector_store(vector_store.identifier)
all_stores.append(vector_store)
except Exception as e:
logger.error(f"Error retrieving vector store {vector_db.identifier}: {e}")
logger.error(f"Error retrieving vector store {vector_store.identifier}: {e}")
continue
# Sort by created_at
@ -243,8 +251,7 @@ class VectorIORouter(VectorIO):
vector_store_id: str,
) -> VectorStoreDeleteResponse:
logger.debug(f"VectorIORouter.openai_delete_vector_store: {vector_store_id}")
provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store(vector_store_id)
return await self.routing_table.openai_delete_vector_store(vector_store_id)
async def openai_search_vector_store(
self,

View file

@ -41,7 +41,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
elif api == Api.safety:
return await p.register_shield(obj)
elif api == Api.vector_io:
return await p.register_vector_db(obj)
return await p.register_vector_store(obj)
elif api == Api.datasetio:
return await p.register_dataset(obj)
elif api == Api.scoring:
@ -57,7 +57,7 @@ async def register_object_with_provider(obj: RoutableObject, p: Any) -> Routable
async def unregister_object_from_provider(obj: RoutableObject, p: Any) -> None:
api = get_impl_api(p)
if api == Api.vector_io:
return await p.unregister_vector_db(obj.identifier)
return await p.unregister_vector_store(obj.identifier)
elif api == Api.inference:
return await p.unregister_model(obj.identifier)
elif api == Api.safety:
@ -108,7 +108,7 @@ class CommonRoutingTableImpl(RoutingTable):
elif api == Api.safety:
p.shield_store = self
elif api == Api.vector_io:
p.vector_db_store = self
p.vector_store_store = self
elif api == Api.datasetio:
p.dataset_store = self
elif api == Api.scoring:
@ -134,12 +134,15 @@ class CommonRoutingTableImpl(RoutingTable):
from .scoring_functions import ScoringFunctionsRoutingTable
from .shields import ShieldsRoutingTable
from .toolgroups import ToolGroupsRoutingTable
from .vector_stores import VectorStoresRoutingTable
def apiname_object():
if isinstance(self, ModelsRoutingTable):
return ("Inference", "model")
elif isinstance(self, ShieldsRoutingTable):
return ("Safety", "shield")
elif isinstance(self, VectorStoresRoutingTable):
return ("VectorIO", "vector_store")
elif isinstance(self, DatasetsRoutingTable):
return ("DatasetIO", "dataset")
elif isinstance(self, ScoringFunctionsRoutingTable):

View file

@ -0,0 +1,292 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.models import ModelType
from llama_stack.apis.resource import ResourceType
# Removed VectorStores import to avoid exposing public API
from llama_stack.apis.vector_io.vector_io import (
SearchRankingOptions,
VectorStoreChunkingStrategy,
VectorStoreDeleteResponse,
VectorStoreFileContentsResponse,
VectorStoreFileDeleteResponse,
VectorStoreFileObject,
VectorStoreFileStatus,
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.core.datatypes import (
VectorStoreWithOwner,
)
from llama_stack.log import get_logger
from .common import CommonRoutingTableImpl, lookup_model
logger = get_logger(name=__name__, category="core::routing_tables")
class VectorStoresRoutingTable(CommonRoutingTableImpl):
"""Internal routing table for vector_store operations.
Does not inherit from VectorStores to avoid exposing public API endpoints.
Only provides internal routing functionality for VectorIORouter.
"""
# Internal methods only - no public API exposure
async def register_vector_store(
self,
vector_store_id: str,
embedding_model: str,
embedding_dimension: int | None = 384,
provider_id: str | None = None,
provider_vector_store_id: str | None = None,
vector_store_name: str | None = None,
) -> Any:
if provider_id is None:
if len(self.impls_by_provider_id) > 0:
provider_id = list(self.impls_by_provider_id.keys())[0]
if len(self.impls_by_provider_id) > 1:
logger.warning(
f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
)
else:
raise ValueError("No provider available. Please configure a vector_io provider.")
model = await lookup_model(self, embedding_model)
if model is None:
raise ModelNotFoundError(embedding_model)
if model.model_type != ModelType.embedding:
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
vector_store = VectorStoreWithOwner(
identifier=vector_store_id,
type=ResourceType.vector_store.value,
provider_id=provider_id,
provider_resource_id=provider_vector_store_id,
embedding_model=embedding_model,
embedding_dimension=embedding_dimension,
vector_store_name=vector_store_name,
)
await self.register_object(vector_store)
return vector_store
async def openai_retrieve_vector_store(
self,
vector_store_id: str,
) -> VectorStoreObject:
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store(vector_store_id)
async def openai_update_vector_store(
self,
vector_store_id: str,
name: str | None = None,
expires_after: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> VectorStoreObject:
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store(
vector_store_id=vector_store_id,
name=name,
expires_after=expires_after,
metadata=metadata,
)
async def openai_delete_vector_store(
self,
vector_store_id: str,
) -> VectorStoreDeleteResponse:
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
result = await provider.openai_delete_vector_store(vector_store_id)
await self.unregister_vector_store(vector_store_id)
return result
async def unregister_vector_store(self, vector_store_id: str) -> None:
"""Remove the vector store from the routing table registry."""
try:
vector_store_obj = await self.get_object_by_identifier("vector_store", vector_store_id)
if vector_store_obj:
await self.unregister_object(vector_store_obj)
except Exception as e:
# Log the error but don't fail the operation
logger.warning(f"Failed to unregister vector store {vector_store_id} from routing table: {e}")
async def openai_search_vector_store(
self,
vector_store_id: str,
query: str | list[str],
filters: dict[str, Any] | None = None,
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_search_vector_store(
vector_store_id=vector_store_id,
query=query,
filters=filters,
max_num_results=max_num_results,
ranking_options=ranking_options,
rewrite_query=rewrite_query,
search_mode=search_mode,
)
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id,
file_id=file_id,
attributes=attributes,
chunking_strategy=chunking_strategy,
)
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
limit: int | None = 20,
order: str | None = "desc",
after: str | None = None,
before: str | None = None,
filter: VectorStoreFileStatus | None = None,
) -> list[VectorStoreFileObject]:
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store(
vector_store_id=vector_store_id,
limit=limit,
order=order,
after=after,
before=before,
filter=filter,
)
async def openai_retrieve_vector_store_file(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileObject:
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file(
vector_store_id=vector_store_id,
file_id=file_id,
)
async def openai_retrieve_vector_store_file_contents(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileContentsResponse:
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(
vector_store_id=vector_store_id,
file_id=file_id,
)
async def openai_update_vector_store_file(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any],
) -> VectorStoreFileObject:
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store_file(
vector_store_id=vector_store_id,
file_id=file_id,
attributes=attributes,
)
async def openai_delete_vector_store_file(
self,
vector_store_id: str,
file_id: str,
) -> VectorStoreFileDeleteResponse:
await self.assert_action_allowed("delete", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_delete_vector_store_file(
vector_store_id=vector_store_id,
file_id=file_id,
)
async def openai_create_vector_store_file_batch(
self,
vector_store_id: str,
file_ids: list[str],
attributes: dict[str, Any] | None = None,
chunking_strategy: Any | None = None,
):
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_create_vector_store_file_batch(
vector_store_id=vector_store_id,
file_ids=file_ids,
attributes=attributes,
chunking_strategy=chunking_strategy,
)
async def openai_retrieve_vector_store_file_batch(
self,
batch_id: str,
vector_store_id: str,
):
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_batch(
batch_id=batch_id,
vector_store_id=vector_store_id,
)
async def openai_list_files_in_vector_store_file_batch(
self,
batch_id: str,
vector_store_id: str,
after: str | None = None,
before: str | None = None,
filter: str | None = None,
limit: int | None = 20,
order: str | None = "desc",
):
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_list_files_in_vector_store_file_batch(
batch_id=batch_id,
vector_store_id=vector_store_id,
after=after,
before=before,
filter=filter,
limit=limit,
order=order,
)
async def openai_cancel_vector_store_file_batch(
self,
batch_id: str,
vector_store_id: str,
):
await self.assert_action_allowed("update", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_cancel_vector_store_file_batch(
batch_id=batch_id,
vector_store_id=vector_store_id,
)

View file

@ -72,13 +72,30 @@ class AuthProvider(ABC):
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> dict[str, list[str]]:
attributes: dict[str, list[str]] = {}
for claim_key, attribute_key in mapping.items():
if claim_key not in claims:
# First try dot notation for nested traversal (e.g., "resource_access.llamastack.roles")
# Then fall back to literal key with dots (e.g., "my.dotted.key")
claim: object = claims
keys = claim_key.split(".")
for key in keys:
if isinstance(claim, dict) and key in claim:
claim = claim[key]
else:
claim = None
break
if claim is None and claim_key in claims:
# Fall back to checking if claim_key exists as a literal key
claim = claims[claim_key]
if claim is None:
continue
claim = claims[claim_key]
if isinstance(claim, list):
values = claim
else:
elif isinstance(claim, str):
values = claim.split()
else:
continue
if attribute_key in attributes:
attributes[attribute_key].extend(values)

View file

@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta
from starlette.types import ASGIApp, Receive, Scope, Send
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
logger = get_logger(name=__name__, category="core::server")
@ -33,7 +33,7 @@ class QuotaMiddleware:
def __init__(
self,
app: ASGIApp,
kv_config: KVStoreConfig,
kv_config: KVStoreReference,
anonymous_max_requests: int,
authenticated_max_requests: int,
window_seconds: int = 86400,
@ -45,15 +45,15 @@ class QuotaMiddleware:
self.authenticated_max_requests = authenticated_max_requests
self.window_seconds = window_seconds
if isinstance(self.kv_config, SqliteKVStoreConfig):
logger.warning(
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
f"window_seconds={self.window_seconds}"
)
async def _get_kv(self) -> KVStore:
if self.kv is None:
self.kv = await kvstore_impl(self.kv_config)
backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
logger.warning(
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
f"window_seconds={self.window_seconds}"
)
return self.kv
async def __call__(self, scope: Scope, receive: Receive, send: Send):

View file

@ -56,7 +56,7 @@ from llama_stack.core.stack import (
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.log import get_logger
from llama_stack.log import get_logger, setup_logging
from llama_stack.providers.datatypes import Api
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
@ -374,6 +374,9 @@ def create_app() -> StackApp:
Returns:
Configured StackApp instance.
"""
# Initialize logging from environment variables first
setup_logging()
config_file = os.getenv("LLAMA_STACK_CONFIG")
if config_file is None:
raise ValueError("LLAMA_STACK_CONFIG environment variable is required")

View file

@ -35,13 +35,23 @@ from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
from llama_stack.core.datatypes import Provider, StackRunConfig
from llama_stack.core.datatypes import Provider, StackRunConfig, VectorStoresConfig
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl
from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl
from llama_stack.core.providers import ProviderImpl, ProviderImplConfig
from llama_stack.core.resolver import ProviderRegistry, resolve_impls
from llama_stack.core.routing_tables.common import CommonRoutingTableImpl
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
StorageBackendConfig,
StorageConfig,
)
from llama_stack.core.store.registry import create_dist_registry
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import get_logger
@ -98,33 +108,9 @@ REGISTRY_REFRESH_TASK = None
TEST_RECORDING_CONTEXT = None
async def validate_default_embedding_model(impls: dict[Api, Any]):
"""Validate that at most one embedding model is marked as default."""
if Api.models not in impls:
return
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = response.data if hasattr(response, "data") else response
default_embedding_models = []
for model in models_list:
if model.model_type == "embedding" and model.metadata.get("default_configured") is True:
default_embedding_models.append(model.identifier)
if len(default_embedding_models) > 1:
raise ValueError(
f"Multiple embedding models marked as default_configured=True: {default_embedding_models}. "
"Only one embedding model can be marked as default."
)
if default_embedding_models:
logger.info(f"Default embedding model configured: {default_embedding_models[0]}")
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
for rsrc, api, register_method, list_method in RESOURCES:
objects = getattr(run_config, rsrc)
objects = getattr(run_config.registered_resources, rsrc)
if api not in impls:
continue
@ -152,7 +138,41 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
f"{rsrc.capitalize()}: {obj.identifier} served by {obj.provider_id}",
)
await validate_default_embedding_model(impls)
async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
"""Validate vector stores configuration."""
if vector_stores_config is None:
return
default_embedding_model = vector_stores_config.default_embedding_model
if default_embedding_model is None:
return
provider_id = default_embedding_model.provider_id
model_id = default_embedding_model.model_id
default_model_id = f"{provider_id}/{model_id}"
if Api.models not in impls:
raise ValueError(f"Models API is not available but vector_stores config requires model '{default_model_id}'")
models_impl = impls[Api.models]
response = await models_impl.list_models()
models_list = {m.identifier: m for m in response.data if m.model_type == "embedding"}
default_model = models_list.get(default_model_id)
if default_model is None:
raise ValueError(f"Embedding model '{default_model_id}' not found. Available embedding models: {models_list}")
embedding_dimension = default_model.metadata.get("embedding_dimension")
if embedding_dimension is None:
raise ValueError(f"Embedding model '{default_model_id}' is missing 'embedding_dimension' in metadata")
try:
int(embedding_dimension)
except ValueError as err:
raise ValueError(f"Embedding dimension '{embedding_dimension}' cannot be converted to an integer") from err
logger.debug(f"Validated default embedding model: {default_model_id} (dimension: {embedding_dimension})")
class EnvVarError(Exception):
@ -329,6 +349,25 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
impls[Api.conversations] = conversations_impl
def _initialize_storage(run_config: StackRunConfig):
kv_backends: dict[str, StorageBackendConfig] = {}
sql_backends: dict[str, StorageBackendConfig] = {}
for backend_name, backend_config in run_config.storage.backends.items():
type = backend_config.type.value
if type.startswith("kv_"):
kv_backends[backend_name] = backend_config
elif type.startswith("sql_"):
sql_backends[backend_name] = backend_config
else:
raise ValueError(f"Unknown storage backend type: {type}")
from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
register_kvstore_backends(kv_backends)
register_sqlstore_backends(sql_backends)
class Stack:
def __init__(self, run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None):
self.run_config = run_config
@ -347,7 +386,11 @@ class Stack:
TEST_RECORDING_CONTEXT.__enter__()
logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
_initialize_storage(self.run_config)
stores = self.run_config.storage.stores
if not stores.metadata:
raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
internal_impls = {}
@ -367,8 +410,8 @@ class Stack:
await impls[Api.conversations].initialize()
await register_resources(self.run_config, impls)
await refresh_registry_once(impls)
await validate_vector_stores_config(self.run_config.vector_stores, impls)
self.impls = impls
def create_registry_refresh_task(self):
@ -488,5 +531,16 @@ def run_config_from_adhoc_config_spec(
image_name="distro-test",
apis=list(provider_configs_by_api.keys()),
providers=provider_configs_by_api,
storage=StorageConfig(
backends={
"kv_default": SqliteKVStoreConfig(db_path=f"{distro_dir}/kvstore.db"),
"sql_default": SqliteSqlStoreConfig(db_path=f"{distro_dir}/sql_store.db"),
},
stores=ServerStoresConfig(
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
),
),
)
return config

View file

@ -3,5 +3,3 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .vector_dbs import *

View file

@ -0,0 +1,283 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import re
from abc import abstractmethod
from enum import StrEnum
from pathlib import Path
from typing import Annotated, Literal
from pydantic import BaseModel, Field, field_validator
class StorageBackendType(StrEnum):
KV_REDIS = "kv_redis"
KV_SQLITE = "kv_sqlite"
KV_POSTGRES = "kv_postgres"
KV_MONGODB = "kv_mongodb"
SQL_SQLITE = "sql_sqlite"
SQL_POSTGRES = "sql_postgres"
class CommonConfig(BaseModel):
namespace: str | None = Field(
default=None,
description="All keys will be prefixed with this namespace",
)
class RedisKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_REDIS] = StorageBackendType.KV_REDIS
host: str = "localhost"
port: int = 6379
@property
def url(self) -> str:
return f"redis://{self.host}:{self.port}"
@classmethod
def pip_packages(cls) -> list[str]:
return ["redis"]
@classmethod
def sample_run_config(cls):
return {
"type": StorageBackendType.KV_REDIS.value,
"host": "${env.REDIS_HOST:=localhost}",
"port": "${env.REDIS_PORT:=6379}",
}
class SqliteKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_SQLITE] = StorageBackendType.KV_SQLITE
db_path: str = Field(
description="File path for the sqlite database",
)
@classmethod
def pip_packages(cls) -> list[str]:
return ["aiosqlite"]
@classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
return {
"type": StorageBackendType.KV_SQLITE.value,
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
}
class PostgresKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
host: str = "localhost"
port: int | str = 5432
db: str = "llamastack"
user: str
password: str | None = None
ssl_mode: str | None = None
ca_cert_path: str | None = None
table_name: str = "llamastack_kvstore"
@classmethod
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
return {
"type": StorageBackendType.KV_POSTGRES.value,
"host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:=llamastack}",
"user": "${env.POSTGRES_USER:=llamastack}",
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
"table_name": "${env.POSTGRES_TABLE_NAME:=" + table_name + "}",
}
@classmethod
@field_validator("table_name")
def validate_table_name(cls, v: str) -> str:
# PostgreSQL identifiers rules:
# - Must start with a letter or underscore
# - Can contain letters, numbers, and underscores
# - Maximum length is 63 bytes
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
if not re.match(pattern, v):
raise ValueError(
"Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
)
if len(v) > 63:
raise ValueError("Table name must be less than 63 characters")
return v
@classmethod
def pip_packages(cls) -> list[str]:
return ["psycopg2-binary"]
class MongoDBKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_MONGODB] = StorageBackendType.KV_MONGODB
host: str = "localhost"
port: int = 27017
db: str = "llamastack"
user: str | None = None
password: str | None = None
collection_name: str = "llamastack_kvstore"
@classmethod
def pip_packages(cls) -> list[str]:
return ["pymongo"]
@classmethod
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
return {
"type": StorageBackendType.KV_MONGODB.value,
"host": "${env.MONGODB_HOST:=localhost}",
"port": "${env.MONGODB_PORT:=5432}",
"db": "${env.MONGODB_DB}",
"user": "${env.MONGODB_USER}",
"password": "${env.MONGODB_PASSWORD}",
"collection_name": "${env.MONGODB_COLLECTION_NAME:=" + collection_name + "}",
}
class SqlAlchemySqlStoreConfig(BaseModel):
@property
@abstractmethod
def engine_str(self) -> str: ...
# TODO: move this when we have a better way to specify dependencies with internal APIs
@classmethod
def pip_packages(cls) -> list[str]:
return ["sqlalchemy[asyncio]"]
class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
db_path: str = Field(
description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
)
@property
def engine_str(self) -> str:
return "sqlite+aiosqlite:///" + Path(self.db_path).expanduser().as_posix()
@classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
return {
"type": StorageBackendType.SQL_SQLITE.value,
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
}
@classmethod
def pip_packages(cls) -> list[str]:
return super().pip_packages() + ["aiosqlite"]
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
host: str = "localhost"
port: int | str = 5432
db: str = "llamastack"
user: str
password: str | None = None
@property
def engine_str(self) -> str:
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
@classmethod
def pip_packages(cls) -> list[str]:
return super().pip_packages() + ["asyncpg"]
@classmethod
def sample_run_config(cls, **kwargs):
return {
"type": StorageBackendType.SQL_POSTGRES.value,
"host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:=llamastack}",
"user": "${env.POSTGRES_USER:=llamastack}",
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
}
# reference = (backend_name, table_name)
class SqlStoreReference(BaseModel):
"""A reference to a 'SQL-like' persistent store. A table name must be provided."""
table_name: str = Field(
description="Name of the table to use for the SqlStore",
)
backend: str = Field(
description="Name of backend from storage.backends",
)
# reference = (backend_name, namespace)
class KVStoreReference(BaseModel):
"""A reference to a 'key-value' persistent store. A namespace must be provided."""
namespace: str = Field(
description="Key prefix for KVStore backends",
)
backend: str = Field(
description="Name of backend from storage.backends",
)
StorageBackendConfig = Annotated[
RedisKVStoreConfig
| SqliteKVStoreConfig
| PostgresKVStoreConfig
| MongoDBKVStoreConfig
| SqliteSqlStoreConfig
| PostgresSqlStoreConfig,
Field(discriminator="type"),
]
class InferenceStoreReference(SqlStoreReference):
"""Inference store configuration with queue tuning."""
max_write_queue_size: int = Field(
default=10000,
description="Max queued writes for inference store",
)
num_writers: int = Field(
default=4,
description="Number of concurrent background writers",
)
class ResponsesStoreReference(InferenceStoreReference):
"""Responses store configuration with queue tuning."""
class ServerStoresConfig(BaseModel):
metadata: KVStoreReference | None = Field(
default=None,
description="Metadata store configuration (uses KV backend)",
)
inference: InferenceStoreReference | None = Field(
default=None,
description="Inference store configuration (uses SQL backend)",
)
conversations: SqlStoreReference | None = Field(
default=None,
description="Conversations store configuration (uses SQL backend)",
)
responses: ResponsesStoreReference | None = Field(
default=None,
description="Responses store configuration (uses SQL backend)",
)
class StorageConfig(BaseModel):
backends: dict[str, StorageBackendConfig] = Field(
description="Named backend configurations (e.g., 'default', 'cache')",
)
stores: ServerStoresConfig = Field(
default_factory=lambda: ServerStoresConfig(),
description="Named references to storage backends used by the stack core",
)

View file

@ -11,10 +11,9 @@ from typing import Protocol
import pydantic
from llama_stack.core.datatypes import RoutableObjectWithProvider
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
logger = get_logger(__name__, category="core::registry")
@ -191,16 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
async def create_dist_registry(
metadata_store: KVStoreConfig | None,
image_name: str,
metadata_store: KVStoreReference, image_name: str
) -> tuple[CachedDiskDistributionRegistry, KVStore]:
# instantiate kvstore for storing and retrieving distribution metadata
if metadata_store:
dist_kvstore = await kvstore_impl(metadata_store)
else:
dist_kvstore = await kvstore_impl(
SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
)
dist_kvstore = await kvstore_impl(metadata_store)
dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
await dist_registry.initialize()
return dist_registry, dist_kvstore

View file

@ -9,7 +9,7 @@
1. Start up Llama Stack API server. More details [here](https://llamastack.github.io/latest/getting_started/index.htmll).
```
llama stack build --distro together --image-type venv
llama stack list-deps together | xargs -L1 uv pip install
llama stack run together
```

View file

@ -32,7 +32,7 @@ def tool_chat_page():
tool_groups_list = [tool_group.identifier for tool_group in tool_groups]
mcp_tools_list = [tool for tool in tool_groups_list if tool.startswith("mcp::")]
builtin_tools_list = [tool for tool in tool_groups_list if not tool.startswith("mcp::")]
selected_vector_dbs = []
selected_vector_stores = []
def reset_agent():
st.session_state.clear()
@ -55,13 +55,13 @@ def tool_chat_page():
)
if "builtin::rag" in toolgroup_selection:
vector_dbs = llama_stack_api.client.vector_dbs.list() or []
if not vector_dbs:
vector_stores = llama_stack_api.client.vector_stores.list() or []
if not vector_stores:
st.info("No vector databases available for selection.")
vector_dbs = [vector_db.identifier for vector_db in vector_dbs]
selected_vector_dbs = st.multiselect(
vector_stores = [vector_store.identifier for vector_store in vector_stores]
selected_vector_stores = st.multiselect(
label="Select Document Collections to use in RAG queries",
options=vector_dbs,
options=vector_stores,
on_change=reset_agent,
)
@ -119,7 +119,7 @@ def tool_chat_page():
tool_dict = dict(
name="builtin::rag",
args={
"vector_db_ids": list(selected_vector_dbs),
"vector_store_ids": list(selected_vector_stores),
},
)
toolgroup_selection[i] = tool_dict

View file

@ -42,25 +42,25 @@ def resolve_config_or_distro(
# Strategy 1: Try as file path first
config_path = Path(config_or_distro)
if config_path.exists() and config_path.is_file():
logger.info(f"Using file path: {config_path}")
logger.debug(f"Using file path: {config_path}")
return config_path.resolve()
# Strategy 2: Try as distribution name (if no .yaml extension)
if not config_or_distro.endswith(".yaml"):
distro_config = _get_distro_config_path(config_or_distro, mode)
if distro_config.exists():
logger.info(f"Using distribution: {distro_config}")
logger.debug(f"Using distribution: {distro_config}")
return distro_config
# Strategy 3: Try as built distribution name
distrib_config = DISTRIBS_BASE_DIR / f"llamastack-{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
if distrib_config.exists():
logger.info(f"Using built distribution: {distrib_config}")
logger.debug(f"Using built distribution: {distrib_config}")
return distrib_config
distrib_config = DISTRIBS_BASE_DIR / f"{config_or_distro}" / f"{config_or_distro}-{mode}.yaml"
if distrib_config.exists():
logger.info(f"Using built distribution: {distrib_config}")
logger.debug(f"Using built distribution: {distrib_config}")
return distrib_config
# Strategy 4: Failed - provide helpful error

View file

@ -25,6 +25,8 @@ distribution_spec:
- provider_type: inline::milvus
- provider_type: remote::chromadb
- provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files:
- provider_type: inline::localfs
safety:

View file

@ -93,30 +93,30 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec_registry.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/milvus_registry.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests/}/chroma_remote_registry.db
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
@ -125,17 +125,32 @@ providers:
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/pgvector_registry.db
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/files_metadata.db
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
@ -163,21 +181,21 @@ providers:
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -207,35 +225,52 @@ providers:
provider_type: inline::reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/batches.db
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/conversations.db
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
namespace: batches
backend: kv_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -157,7 +157,7 @@ docker run \
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
```bash
llama stack build --distro {{ name }} --image-type conda
llama stack list-deps {{ name }} | xargs -L1 pip install
INFERENCE_MODEL=$INFERENCE_MODEL \
DEH_URL=$DEH_URL \
CHROMA_URL=$CHROMA_URL \

View file

@ -26,9 +26,9 @@ providers:
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -38,32 +38,35 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -86,40 +89,52 @@ providers:
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi0
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: tgi1
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: ${env.SAFETY_MODEL}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: brave-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi0
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: tgi1
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: ${env.SAFETY_MODEL}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: brave-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -22,9 +22,9 @@ providers:
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell/}/chroma_remote_registry.db
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -34,32 +34,35 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -82,35 +85,47 @@ providers:
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/conversations.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi0
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: brave-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi0
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: brave-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -70,10 +70,10 @@ docker run \
### Via venv
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
Make sure you have the Llama Stack CLI available.
```bash
llama stack build --distro {{ name }} --image-type venv
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
llama stack run distributions/{{ name }}/run.yaml \
--port 8321

View file

@ -37,9 +37,9 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -49,32 +49,35 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -99,40 +102,52 @@ providers:
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: meta-reference-safety
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: ${env.SAFETY_MODEL}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: meta-reference-safety
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: ${env.SAFETY_MODEL}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -27,9 +27,9 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -39,32 +39,35 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -89,35 +92,47 @@ providers:
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -126,11 +126,11 @@ docker run \
### Via venv
If you've set up your local development environment, you can also build the image using your local virtual environment.
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
```bash
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
llama stack build --distro nvidia --image-type venv
llama stack list-deps nvidia | xargs -L1 uv pip install
NVIDIA_API_KEY=$NVIDIA_API_KEY \
INFERENCE_MODEL=$INFERENCE_MODEL \
llama stack run ./run.yaml \

View file

@ -28,9 +28,9 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: nvidia
provider_type: remote::nvidia
@ -41,12 +41,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: nvidia
provider_type: remote::nvidia
@ -65,8 +68,8 @@ providers:
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
- provider_id: nvidia
provider_type: remote::nvidia
config:
@ -86,36 +89,48 @@ providers:
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: nvidia
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: nvidia
model_type: llm
shields:
- shield_id: ${env.SAFETY_MODEL}
provider_id: nvidia
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::rag
provider_id: rag-runtime
table_name: files_metadata
backend: sql_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: nvidia
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: nvidia
model_type: llm
shields:
- shield_id: ${env.SAFETY_MODEL}
provider_id: nvidia
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -23,9 +23,9 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: nvidia
provider_type: remote::nvidia
@ -36,12 +36,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: nvidia
provider_type: remote::nvidia
@ -75,26 +78,38 @@ providers:
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/files_metadata.db
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/conversations.db
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::rag
provider_id: rag-runtime
table_name: files_metadata
backend: sql_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -39,16 +39,16 @@ providers:
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec_registry.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/chroma_remote_registry.db
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
provider_type: remote::pgvector
config:
@ -57,9 +57,9 @@ providers:
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/pgvector_registry.db
persistence:
namespace: vector_io::pgvector
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -69,32 +69,35 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -119,118 +122,130 @@ providers:
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/conversations.db
models:
- metadata: {}
model_id: gpt-4o
provider_id: openai
provider_model_id: gpt-4o
model_type: llm
- metadata: {}
model_id: claude-3-5-sonnet-latest
provider_id: anthropic
provider_model_id: claude-3-5-sonnet-latest
model_type: llm
- metadata: {}
model_id: gemini/gemini-1.5-flash
provider_id: gemini
provider_model_id: gemini/gemini-1.5-flash
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets:
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/simpleqa?split=train
metadata: {}
dataset_id: simpleqa
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
metadata: {}
dataset_id: mmlu_cot
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
metadata: {}
dataset_id: gpqa_cot
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/math_500?split=test
metadata: {}
dataset_id: math_500
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/IfEval?split=train
metadata: {}
dataset_id: ifeval
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/docvqa?split=val
metadata: {}
dataset_id: docvqa
scoring_fns: []
benchmarks:
- dataset_id: simpleqa
scoring_functions:
- llm-as-judge::405b-simpleqa
metadata: {}
benchmark_id: meta-reference-simpleqa
- dataset_id: mmlu_cot
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-mmlu-cot
- dataset_id: gpqa_cot
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa-cot
- dataset_id: math_500
scoring_functions:
- basic::regex_parser_math_response
metadata: {}
benchmark_id: meta-reference-math-500
- dataset_id: ifeval
scoring_functions:
- basic::ifeval
metadata: {}
benchmark_id: meta-reference-ifeval
- dataset_id: docvqa
scoring_functions:
- basic::docvqa
metadata: {}
benchmark_id: meta-reference-docvqa
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models:
- metadata: {}
model_id: gpt-4o
provider_id: openai
provider_model_id: gpt-4o
model_type: llm
- metadata: {}
model_id: claude-3-5-sonnet-latest
provider_id: anthropic
provider_model_id: claude-3-5-sonnet-latest
model_type: llm
- metadata: {}
model_id: gemini/gemini-1.5-flash
provider_id: gemini
provider_model_id: gemini/gemini-1.5-flash
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets:
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/simpleqa?split=train
metadata: {}
dataset_id: simpleqa
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
metadata: {}
dataset_id: mmlu_cot
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
metadata: {}
dataset_id: gpqa_cot
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/math_500?split=test
metadata: {}
dataset_id: math_500
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/IfEval?split=train
metadata: {}
dataset_id: ifeval
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/docvqa?split=val
metadata: {}
dataset_id: docvqa
scoring_fns: []
benchmarks:
- dataset_id: simpleqa
scoring_functions:
- llm-as-judge::405b-simpleqa
metadata: {}
benchmark_id: meta-reference-simpleqa
- dataset_id: mmlu_cot
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-mmlu-cot
- dataset_id: gpqa_cot
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa-cot
- dataset_id: math_500
scoring_functions:
- basic::regex_parser_math_response
metadata: {}
benchmark_id: meta-reference-math-500
- dataset_id: ifeval
scoring_functions:
- basic::ifeval
metadata: {}
benchmark_id: meta-reference-ifeval
- dataset_id: docvqa
scoring_functions:
- basic::docvqa
metadata: {}
benchmark_id: meta-reference-docvqa
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -91,7 +91,6 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 768,
},
)
postgres_config = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate(
name=name,
distro_type="self_hosted",
@ -105,22 +104,16 @@ def get_distribution_template() -> DistributionTemplate:
provider_overrides={
"inference": inference_providers + [embedding_provider],
"vector_io": vector_io_providers,
"agents": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
persistence_store=postgres_config,
responses_store=postgres_config,
),
)
],
},
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
metadata_store=PostgresKVStoreConfig.sample_run_config(),
inference_store=postgres_config,
storage_backends={
"kv_default": PostgresKVStoreConfig.sample_run_config(
table_name="llamastack_kvstore",
),
"sql_default": PostgresSqlStoreConfig.sample_run_config(),
},
),
},
run_config_env_vars={

View file

@ -22,9 +22,9 @@ providers:
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/chroma_remote_registry.db
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -34,20 +34,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
@ -63,45 +58,57 @@ providers:
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
metadata_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
inference_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/postgres-demo}/conversations.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
storage:
backends:
kv_default:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_default:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -26,6 +26,8 @@ distribution_spec:
- provider_type: inline::milvus
- provider_type: remote::chromadb
- provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files:
- provider_type: inline::localfs
safety:

View file

@ -93,30 +93,30 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec_registry.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/milvus_registry.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu/}/chroma_remote_registry.db
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
@ -125,17 +125,32 @@ providers:
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/pgvector_registry.db
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/files_metadata.db
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: huggingface-gpu
provider_type: inline::huggingface-gpu
@ -166,21 +184,21 @@ providers:
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -210,35 +228,52 @@ providers:
provider_type: inline::reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/batches.db
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/conversations.db
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
namespace: batches
backend: kv_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -26,6 +26,8 @@ distribution_spec:
- provider_type: inline::milvus
- provider_type: remote::chromadb
- provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files:
- provider_type: inline::localfs
safety:

View file

@ -93,30 +93,30 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec_registry.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter/}/chroma_remote_registry.db
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
@ -125,17 +125,32 @@ providers:
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/pgvector_registry.db
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -147,12 +162,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
@ -163,21 +181,21 @@ providers:
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -207,35 +225,52 @@ providers:
provider_type: inline::reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/batches.db
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/conversations.db
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
namespace: batches
backend: kv_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -11,8 +11,10 @@ from llama_stack.core.datatypes import (
BuildProvider,
Provider,
ProviderSpec,
QualifiedModel,
ShieldInput,
ToolGroupInput,
VectorStoresConfig,
)
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
@ -31,6 +33,8 @@ from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOC
from llama_stack.providers.remote.vector_io.pgvector.config import (
PGVectorVectorIOConfig,
)
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
@ -113,6 +117,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
BuildProvider(provider_type="inline::milvus"),
BuildProvider(provider_type="remote::chromadb"),
BuildProvider(provider_type="remote::pgvector"),
BuildProvider(provider_type="remote::qdrant"),
BuildProvider(provider_type="remote::weaviate"),
],
"files": [BuildProvider(provider_type="inline::localfs")],
"safety": [
@ -221,12 +227,35 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
password="${env.PGVECTOR_PASSWORD:=}",
),
),
Provider(
provider_id="${env.QDRANT_URL:+qdrant}",
provider_type="remote::qdrant",
config=QdrantVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
url="${env.QDRANT_URL:=}",
),
),
Provider(
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
provider_type="remote::weaviate",
config=WeaviateVectorIOConfig.sample_run_config(
f"~/.llama/distributions/{name}",
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
),
),
],
"files": [files_provider],
},
default_models=[],
default_tool_groups=default_tool_groups,
default_shields=default_shields,
vector_stores_config=VectorStoresConfig(
default_provider_id="faiss",
default_embedding_model=QualifiedModel(
provider_id="sentence-transformers",
model_id="nomic-ai/nomic-embed-text-v1.5",
),
),
),
},
run_config_env_vars={

View file

@ -27,8 +27,15 @@ from llama_stack.core.datatypes import (
ShieldInput,
TelemetryConfig,
ToolGroupInput,
VectorStoresConfig,
)
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
SqlStoreReference,
StorageBackendType,
)
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
@ -180,10 +187,10 @@ class RunConfigSettings(BaseModel):
default_tool_groups: list[ToolGroupInput] | None = None
default_datasets: list[DatasetInput] | None = None
default_benchmarks: list[BenchmarkInput] | None = None
metadata_store: dict | None = None
inference_store: dict | None = None
conversations_store: dict | None = None
vector_stores_config: VectorStoresConfig | None = None
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
storage_backends: dict[str, Any] | None = None
storage_stores: dict[str, Any] | None = None
def run_config(
self,
@ -226,41 +233,65 @@ class RunConfigSettings(BaseModel):
# Get unique set of APIs from providers
apis = sorted(providers.keys())
storage_backends = self.storage_backends or {
"kv_default": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="kvstore.db",
),
"sql_default": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="sql_store.db",
),
}
storage_stores = self.storage_stores or {
"metadata": KVStoreReference(
backend="kv_default",
namespace="registry",
).model_dump(exclude_none=True),
"inference": InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
).model_dump(exclude_none=True),
"conversations": SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
).model_dump(exclude_none=True),
}
storage_config = dict(
backends=storage_backends,
stores=storage_stores,
)
# Return a dict that matches StackRunConfig structure
return {
config = {
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
"image_name": name,
"container_image": container_image,
"apis": apis,
"providers": provider_configs,
"metadata_store": self.metadata_store
or SqliteKVStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="registry.db",
),
"inference_store": self.inference_store
or SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="inference_store.db",
),
"conversations_store": self.conversations_store
or SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="conversations.db",
),
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
"vector_dbs": [],
"datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
"scoring_fns": [],
"benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
"tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
"storage": storage_config,
"registered_resources": {
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
"vector_dbs": [],
"datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
"scoring_fns": [],
"benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
"tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
},
"server": {
"port": 8321,
},
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
}
if self.vector_stores_config:
config["vector_stores"] = self.vector_stores_config.model_dump(exclude_none=True)
return config
class DistributionTemplate(BaseModel):
"""
@ -297,11 +328,15 @@ class DistributionTemplate(BaseModel):
# We should have a better way to do this by formalizing the concept of "internal" APIs
# and providers, with a way to specify dependencies for them.
if run_config_.get("inference_store"):
additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
if run_config_.get("metadata_store"):
additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
storage_cfg = run_config_.get("storage", {})
for backend_cfg in storage_cfg.get("backends", {}).values():
store_type = backend_cfg.get("type")
if not store_type:
continue
if str(store_type).startswith("kv_"):
additional_pip_packages.extend(get_kv_pip_packages(backend_cfg))
elif str(store_type).startswith("sql_"):
additional_pip_packages.extend(get_sql_pip_packages(backend_cfg))
if self.additional_pip_packages:
additional_pip_packages.extend(self.additional_pip_packages)
@ -387,11 +422,13 @@ class DistributionTemplate(BaseModel):
def enum_representer(dumper, data):
return dumper.represent_scalar("tag:yaml.org,2002:str", data.value)
# Register YAML representer for ModelType
# Register YAML representer for enums
yaml.add_representer(ModelType, enum_representer)
yaml.add_representer(DatasetPurpose, enum_representer)
yaml.add_representer(StorageBackendType, enum_representer)
yaml.SafeDumper.add_representer(ModelType, enum_representer)
yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)
yaml.SafeDumper.add_representer(StorageBackendType, enum_representer)
for output_dir in [yaml_output_dir, doc_output_dir]:
output_dir.mkdir(parents=True, exist_ok=True)

View file

@ -22,9 +22,9 @@ providers:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -34,32 +34,35 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
responses_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
@ -90,28 +93,40 @@ providers:
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
inference_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
conversations_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/conversations.db
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
table_name: files_metadata
backend: sql_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:

View file

@ -166,14 +166,26 @@ class CustomFileHandler(logging.FileHandler):
super().emit(record)
def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None:
def setup_logging(category_levels: dict[str, int] | None = None, log_file: str | None = None) -> None:
"""
Configure logging based on the provided category log levels and an optional log file.
If category_levels or log_file are not provided, they will be read from environment variables.
Parameters:
category_levels (Dict[str, int]): A dictionary mapping categories to their log levels.
log_file (str): Path to a log file to additionally pipe the logs into
category_levels (Dict[str, int] | None): A dictionary mapping categories to their log levels.
If None, reads from LLAMA_STACK_LOGGING environment variable and uses defaults.
log_file (str | None): Path to a log file to additionally pipe the logs into.
If None, reads from LLAMA_STACK_LOG_FILE environment variable.
"""
# Read from environment variables if not explicitly provided
if category_levels is None:
category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
if env_config:
category_levels.update(parse_environment_config(env_config))
if log_file is None:
log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"
class CategoryFilter(logging.Filter):
@ -224,12 +236,30 @@ def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None
}
},
"loggers": {
category: {
"handlers": list(handlers.keys()), # Apply all handlers
"level": category_levels.get(category, DEFAULT_LOG_LEVEL),
"propagate": False, # Disable propagation to root logger
}
for category in CATEGORIES
**{
category: {
"handlers": list(handlers.keys()), # Apply all handlers
"level": category_levels.get(category, DEFAULT_LOG_LEVEL),
"propagate": False, # Disable propagation to root logger
}
for category in CATEGORIES
},
# Explicitly configure uvicorn loggers to preserve their INFO level
"uvicorn": {
"handlers": list(handlers.keys()),
"level": logging.INFO,
"propagate": False,
},
"uvicorn.error": {
"handlers": list(handlers.keys()),
"level": logging.INFO,
"propagate": False,
},
"uvicorn.access": {
"handlers": list(handlers.keys()),
"level": logging.INFO,
"propagate": False,
},
},
"root": {
"handlers": list(handlers.keys()),
@ -238,9 +268,13 @@ def setup_logging(category_levels: dict[str, int], log_file: str | None) -> None
}
dictConfig(logging_config)
# Ensure third-party libraries follow the root log level
for _, logger in logging.root.manager.loggerDict.items():
# Ensure third-party libraries follow the root log level, but preserve
# already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
for name, logger in logging.root.manager.loggerDict.items():
if isinstance(logger, logging.Logger):
# Skip infrastructure loggers (uvicorn, fastapi) and our own loggers
if name.startswith(("uvicorn", "fastapi", "llama_stack")):
continue
logger.setLevel(root_level)
@ -278,12 +312,3 @@ def get_logger(
log_level = _category_levels.get("root", DEFAULT_LOG_LEVEL)
logger.setLevel(log_level)
return logging.LoggerAdapter(logger, {"category": category})
env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
if env_config:
_category_levels.update(parse_environment_config(env_config))
log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
setup_logging(_category_levels, log_file)

View file

@ -17,7 +17,7 @@ from llama_stack.apis.models import Model
from llama_stack.apis.scoring_functions import ScoringFn
from llama_stack.apis.shields import Shield
from llama_stack.apis.tools import ToolGroup
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.schema_utils import json_schema_type
@ -68,10 +68,10 @@ class ShieldsProtocolPrivate(Protocol):
async def unregister_shield(self, identifier: str) -> None: ...
class VectorDBsProtocolPrivate(Protocol):
async def register_vector_db(self, vector_db: VectorDB) -> None: ...
class VectorStoresProtocolPrivate(Protocol):
async def register_vector_store(self, vector_store: VectorStore) -> None: ...
async def unregister_vector_db(self, vector_db_id: str) -> None: ...
async def unregister_vector_store(self, vector_store_id: str) -> None: ...
class DatasetsProtocolPrivate(Protocol):

View file

@ -83,8 +83,8 @@ class MetaReferenceAgentsImpl(Agents):
self.policy = policy
async def initialize(self) -> None:
self.persistence_store = await kvstore_impl(self.config.persistence_store)
self.responses_store = ResponsesStore(self.config.responses_store, self.policy)
self.persistence_store = await kvstore_impl(self.config.persistence.agent_state)
self.responses_store = ResponsesStore(self.config.persistence.responses, self.policy)
await self.responses_store.initialize()
self.openai_responses_impl = OpenAIResponsesImpl(
inference_api=self.inference_api,

View file

@ -8,24 +8,30 @@ from typing import Any
from pydantic import BaseModel
from llama_stack.providers.utils.kvstore import KVStoreConfig
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
class AgentPersistenceConfig(BaseModel):
"""Nested persistence configuration for agents."""
agent_state: KVStoreReference
responses: ResponsesStoreReference
class MetaReferenceAgentsImplConfig(BaseModel):
persistence_store: KVStoreConfig
responses_store: SqlStoreConfig
persistence: AgentPersistenceConfig
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return {
"persistence_store": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="agents_store.db",
),
"responses_store": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="responses_store.db",
),
"persistence": {
"agent_state": KVStoreReference(
backend="kv_default",
namespace="agents",
).model_dump(exclude_none=True),
"responses": ResponsesStoreReference(
backend="sql_default",
table_name="responses",
).model_dump(exclude_none=True),
}
}

View file

@ -359,6 +359,7 @@ class OpenAIResponsesImpl:
tool_executor=self.tool_executor,
safety_api=self.safety_api,
guardrail_ids=guardrail_ids,
instructions=instructions,
)
# Stream the response

View file

@ -110,6 +110,7 @@ class StreamingResponseOrchestrator:
text: OpenAIResponseText,
max_infer_iters: int,
tool_executor, # Will be the tool execution logic from the main class
instructions: str,
safety_api,
guardrail_ids: list[str] | None = None,
):
@ -133,6 +134,8 @@ class StreamingResponseOrchestrator:
self.accumulated_usage: OpenAIResponseUsage | None = None
# Track if we've sent a refusal response
self.violation_detected = False
# system message that is inserted into the model's context
self.instructions = instructions
async def _create_refusal_response(self, violation_message: str) -> OpenAIResponseObjectStream:
"""Create a refusal response to replace streaming content."""
@ -176,6 +179,7 @@ class StreamingResponseOrchestrator:
tools=self.ctx.available_tools(),
error=error,
usage=self.accumulated_usage,
instructions=self.instructions,
)
async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:

View file

@ -6,13 +6,13 @@
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.core.storage.datatypes import KVStoreReference
class ReferenceBatchesImplConfig(BaseModel):
"""Configuration for the Reference Batches implementation."""
kvstore: KVStoreConfig = Field(
kvstore: KVStoreReference = Field(
description="Configuration for the key-value store backend.",
)
@ -33,8 +33,8 @@ class ReferenceBatchesImplConfig(BaseModel):
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict:
return {
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="batches.db",
),
"kvstore": KVStoreReference(
backend="kv_default",
namespace="batches",
).model_dump(exclude_none=True),
}

View file

@ -7,20 +7,17 @@ from typing import Any
from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.core.storage.datatypes import KVStoreReference
class LocalFSDatasetIOConfig(BaseModel):
kvstore: KVStoreConfig
kvstore: KVStoreReference
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="localfs_datasetio.db",
)
"kvstore": KVStoreReference(
backend="kv_default",
namespace="datasetio::localfs",
).model_dump(exclude_none=True)
}

View file

@ -7,20 +7,17 @@ from typing import Any
from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.core.storage.datatypes import KVStoreReference
class MetaReferenceEvalConfig(BaseModel):
kvstore: KVStoreConfig
kvstore: KVStoreReference
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="meta_reference_eval.db",
)
"kvstore": KVStoreReference(
backend="kv_default",
namespace="eval",
).model_dump(exclude_none=True)
}

View file

@ -8,14 +8,14 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
from llama_stack.core.storage.datatypes import SqlStoreReference
class LocalfsFilesImplConfig(BaseModel):
storage_dir: str = Field(
description="Directory to store uploaded files",
)
metadata_store: SqlStoreConfig = Field(
metadata_store: SqlStoreReference = Field(
description="SQL store configuration for file metadata",
)
ttl_secs: int = 365 * 24 * 60 * 60 # 1 year
@ -24,8 +24,8 @@ class LocalfsFilesImplConfig(BaseModel):
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return {
"storage_dir": "${env.FILES_STORAGE_DIR:=" + __distro_dir__ + "/files}",
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="files_metadata.db",
),
"metadata_store": SqlStoreReference(
backend="sql_default",
table_name="files_metadata",
).model_dump(exclude_none=True),
}

View file

@ -59,7 +59,6 @@ class SentenceTransformersInferenceImpl(
provider_id=self.__provider_id__,
metadata={
"embedding_dimension": 768,
"default_configured": True,
},
model_type=ModelType.embedding,
),

View file

@ -1,75 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
from datetime import UTC, datetime
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.sdk.trace.export import SpanProcessor
from opentelemetry.trace.status import StatusCode
from llama_stack.log import get_logger
logger = get_logger(name="console_span_processor", category="telemetry")
class ConsoleSpanProcessor(SpanProcessor):
def __init__(self, print_attributes: bool = False):
self.print_attributes = print_attributes
def on_start(self, span: ReadableSpan, parent_context=None) -> None:
if span.attributes and span.attributes.get("__autotraced__"):
return
timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]")
def on_end(self, span: ReadableSpan) -> None:
timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]"
if span.status.status_code == StatusCode.ERROR:
span_context += " [bold red][ERROR][/bold red]"
elif span.status.status_code != StatusCode.UNSET:
span_context += f" [{span.status.status_code}]"
duration_ms = (span.end_time - span.start_time) / 1e6
span_context += f" ({duration_ms:.2f}ms)"
logger.info(span_context)
if self.print_attributes and span.attributes:
for key, value in span.attributes.items():
if key.startswith("__"):
continue
str_value = str(value)
if len(str_value) > 1000:
str_value = str_value[:997] + "..."
logger.info(f" [dim]{key}[/dim]: {str_value}")
for event in span.events:
event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
severity = event.attributes.get("severity", "info")
message = event.attributes.get("message", event.name)
if isinstance(message, dict) or isinstance(message, list):
message = json.dumps(message, indent=2)
severity_color = {
"error": "red",
"warn": "yellow",
"info": "white",
"debug": "dim",
}.get(severity, "white")
logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}")
if event.attributes:
for key, value in event.attributes.items():
if key.startswith("__") or key in ["message", "severity"]:
continue
logger.info(f"[dim]{key}[/dim]: {value}")
def shutdown(self) -> None:
"""Shutdown the processor."""
pass
def force_flush(self, timeout_millis: float | None = None) -> bool:
"""Force flush any pending spans."""
return True

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import threading
from typing import Any
@ -60,33 +61,38 @@ class TelemetryAdapter(Telemetry):
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
# Since the library client can be recreated multiple times in a notebook,
# the kernel will hold on to the span processor and cause duplicate spans to be written.
if _TRACER_PROVIDER is None:
provider = TracerProvider()
trace.set_tracer_provider(provider)
_TRACER_PROVIDER = provider
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
if _TRACER_PROVIDER is None:
provider = TracerProvider()
trace.set_tracer_provider(provider)
_TRACER_PROVIDER = provider
# Use single OTLP endpoint for all telemetry signals
# Use single OTLP endpoint for all telemetry signals
# Let OpenTelemetry SDK handle endpoint construction automatically
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
span_exporter = OTLPSpanExporter()
span_processor = BatchSpanProcessor(span_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
# Let OpenTelemetry SDK handle endpoint construction automatically
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
span_exporter = OTLPSpanExporter()
span_processor = BatchSpanProcessor(span_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
metric_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(metric_provider)
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
metric_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(metric_provider)
self.is_otel_endpoint_set = True
else:
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
self.is_otel_endpoint_set = False
self.meter = metrics.get_meter(__name__)
self._lock = _global_lock
async def initialize(self) -> None:
pass
async def shutdown(self) -> None:
trace.get_tracer_provider().force_flush()
if self.is_otel_endpoint_set:
trace.get_tracer_provider().force_flush()
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
if isinstance(event, UnstructuredLogEvent):

View file

@ -12,15 +12,8 @@ from .config import ChromaVectorIOConfig
async def get_provider_impl(config: ChromaVectorIOConfig, deps: dict[Api, Any]):
from llama_stack.providers.remote.vector_io.chroma.chroma import (
ChromaVectorIOAdapter,
)
from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaVectorIOAdapter
impl = ChromaVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
await impl.initialize()
return impl

View file

@ -8,14 +8,14 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class ChromaVectorIOConfig(BaseModel):
db_path: str
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
persistence: KVStoreReference = Field(description="Config for KV store backend")
@classmethod
def sample_run_config(
@ -23,8 +23,8 @@ class ChromaVectorIOConfig(BaseModel):
) -> dict[str, Any]:
return {
"db_path": db_path,
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="chroma_inline_registry.db",
),
"persistence": KVStoreReference(
backend="kv_default",
namespace="vector_io::chroma",
).model_dump(exclude_none=True),
}

View file

@ -16,11 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = FaissVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
impl = FaissVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
await impl.initialize()
return impl

View file

@ -8,22 +8,19 @@ from typing import Any
from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class FaissVectorIOConfig(BaseModel):
kvstore: KVStoreConfig
persistence: KVStoreReference
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="faiss_store.db",
)
"persistence": KVStoreReference(
backend="kv_default",
namespace="vector_io::faiss",
).model_dump(exclude_none=True)
}

View file

@ -17,34 +17,21 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
HealthResponse,
HealthStatus,
VectorDBsProtocolPrivate,
)
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
)
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import FaissVectorIOConfig
logger = get_logger(name=__name__, category="vector_io")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:{VERSION}::"
FAISS_INDEX_PREFIX = f"faiss_index:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:{VERSION}::"
@ -155,12 +142,7 @@ class FaissIndex(EmbeddingIndex):
await self._save_index()
async def query_vector(
self,
embedding: NDArray,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
chunks = []
scores = []
@ -175,12 +157,7 @@ class FaissIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=chunks, scores=scores)
async def query_keyword(
self,
query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
raise NotImplementedError(
"Keyword search is not supported - underlying DB FAISS does not support this search mode"
)
@ -199,35 +176,28 @@ class FaissIndex(EmbeddingIndex):
)
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
def __init__(
self,
config: FaissVectorIOConfig,
inference_api: Inference,
models_api: Models,
files_api: Files | None,
) -> None:
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.models_api = models_api
self.cache: dict[str, VectorDBWithIndex] = {}
self.cache: dict[str, VectorStoreWithIndex] = {}
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore)
self.kvstore = await kvstore_impl(self.config.persistence)
# Load existing banks from kvstore
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(vector_db_data)
index = VectorDBWithIndex(
vector_db,
await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
for vector_store_data in stored_vector_stores:
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex(
vector_store,
await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
self.inference_api,
)
self.cache[vector_db.identifier] = index
self.cache[vector_store.identifier] = index
# Load existing OpenAI vector stores into the in-memory cache
await self.initialize_openai_vector_stores()
@ -252,45 +222,33 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
except Exception as e:
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
async def register_vector_db(
self,
vector_db: VectorDB,
) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
assert self.kvstore is not None
key = f"{VECTOR_DBS_PREFIX}{vector_db.identifier}"
await self.kvstore.set(
key=key,
value=vector_db.model_dump_json(),
)
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
# Store in cache
self.cache[vector_db.identifier] = VectorDBWithIndex(
vector_db=vector_db,
index=await FaissIndex.create(vector_db.embedding_dimension, self.kvstore, vector_db.identifier),
self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_store=vector_store,
index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
inference_api=self.inference_api,
)
async def list_vector_dbs(self) -> list[VectorDB]:
return [i.vector_db for i in self.cache.values()]
async def list_vector_stores(self) -> list[VectorStore]:
return [i.vector_store for i in self.cache.values()]
async def unregister_vector_db(self, vector_db_id: str) -> None:
async def unregister_vector_store(self, vector_store_id: str) -> None:
assert self.kvstore is not None
if vector_db_id not in self.cache:
logger.warning(f"Vector DB {vector_db_id} not found")
if vector_store_id not in self.cache:
return
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_db_id}")
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
async def insert_chunks(
self,
vector_db_id: str,
chunks: list[Chunk],
ttl_seconds: int | None = None,
) -> None:
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = self.cache.get(vector_db_id)
if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found. found: {self.cache.keys()}")
@ -298,10 +256,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
await index.insert_chunks(chunks)
async def query_chunks(
self,
vector_db_id: str,
query: InterleavedContent,
params: dict[str, Any] | None = None,
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = self.cache.get(vector_db_id)
if index is None:

View file

@ -14,11 +14,6 @@ from .config import MilvusVectorIOConfig
async def get_provider_impl(config: MilvusVectorIOConfig, deps: dict[Api, Any]):
from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
impl = MilvusVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
await impl.initialize()
return impl

View file

@ -8,25 +8,22 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class MilvusVectorIOConfig(BaseModel):
db_path: str
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {
"db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db",
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="milvus_registry.db",
),
"persistence": KVStoreReference(
backend="kv_default",
namespace="vector_io::milvus",
).model_dump(exclude_none=True),
}

View file

@ -15,11 +15,6 @@ async def get_provider_impl(config: QdrantVectorIOConfig, deps: dict[Api, Any]):
from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter
assert isinstance(config, QdrantVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = QdrantVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
impl = QdrantVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
await impl.initialize()
return impl

View file

@ -9,23 +9,21 @@ from typing import Any
from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class QdrantVectorIOConfig(BaseModel):
path: str
kvstore: KVStoreConfig
persistence: KVStoreReference
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return {
"path": "${env.QDRANT_PATH:=~/.llama/" + __distro_dir__ + "}/" + "qdrant.db",
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__, db_name="qdrant_registry.db"
),
"persistence": KVStoreReference(
backend="kv_default",
namespace="vector_io::qdrant",
).model_dump(exclude_none=True),
}

View file

@ -15,11 +15,6 @@ async def get_provider_impl(config: SQLiteVectorIOConfig, deps: dict[Api, Any]):
from .sqlite_vec import SQLiteVecVectorIOAdapter
assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = SQLiteVecVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
impl = SQLiteVecVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
await impl.initialize()
return impl

View file

@ -8,22 +8,19 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.core.storage.datatypes import KVStoreReference
class SQLiteVectorIOConfig(BaseModel):
db_path: str = Field(description="Path to the SQLite database file")
kvstore: KVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
persistence: KVStoreReference = Field(description="Config for KV store backend (SQLite only for now)")
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return {
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + "sqlite_vec.db",
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="sqlite_vec_registry.db",
),
"persistence": KVStoreReference(
backend="kv_default",
namespace="vector_io::sqlite_vec",
).model_dump(exclude_none=True),
}

View file

@ -17,15 +17,10 @@ from numpy.typing import NDArray
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
@ -33,7 +28,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_RRF,
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
VectorStoreWithIndex,
)
from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
@ -46,7 +41,7 @@ HYBRID_SEARCH = "hybrid"
SEARCH_MODES = {VECTOR_SEARCH, KEYWORD_SEARCH, HYBRID_SEARCH}
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:sqlite_vec:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:sqlite_vec:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:sqlite_vec:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:sqlite_vec:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:sqlite_vec:{VERSION}::"
@ -175,32 +170,18 @@ class SQLiteVecIndex(EmbeddingIndex):
# Insert vector embeddings
embedding_data = [
(
(
chunk.chunk_id,
serialize_vector(emb.tolist()),
)
)
((chunk.chunk_id, serialize_vector(emb.tolist())))
for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
]
cur.executemany(
f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);",
embedding_data,
)
cur.executemany(f"INSERT INTO [{self.vector_table}] (id, embedding) VALUES (?, ?);", embedding_data)
# Insert FTS content
fts_data = [(chunk.chunk_id, chunk.content) for chunk in batch_chunks]
# DELETE existing entries with same IDs (FTS5 doesn't support ON CONFLICT)
cur.executemany(
f"DELETE FROM [{self.fts_table}] WHERE id = ?;",
[(row[0],) for row in fts_data],
)
cur.executemany(f"DELETE FROM [{self.fts_table}] WHERE id = ?;", [(row[0],) for row in fts_data])
# INSERT new entries
cur.executemany(
f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);",
fts_data,
)
cur.executemany(f"INSERT INTO [{self.fts_table}] (id, content) VALUES (?, ?);", fts_data)
connection.commit()
@ -216,12 +197,7 @@ class SQLiteVecIndex(EmbeddingIndex):
# Run batch insertion in a background thread
await asyncio.to_thread(_execute_all_batch_inserts)
async def query_vector(
self,
embedding: NDArray,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
"""
Performs vector-based search using a virtual table for vector similarity.
"""
@ -261,12 +237,7 @@ class SQLiteVecIndex(EmbeddingIndex):
scores.append(score)
return QueryChunksResponse(chunks=chunks, scores=scores)
async def query_keyword(
self,
query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
"""
Performs keyword-based search using SQLite FTS5 for relevance-ranked full-text search.
"""
@ -403,41 +374,32 @@ class SQLiteVecIndex(EmbeddingIndex):
await asyncio.to_thread(_delete_chunks)
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
"""
A VectorIO implementation using SQLite + sqlite_vec.
This class handles vector database registration (with metadata stored in a table named `vector_dbs`)
and creates a cache of VectorDBWithIndex instances (each wrapping a SQLiteVecIndex).
This class handles vector database registration (with metadata stored in a table named `vector_stores`)
and creates a cache of VectorStoreWithIndex instances (each wrapping a SQLiteVecIndex).
"""
def __init__(
self,
config,
inference_api: Inference,
models_api: Models,
files_api: Files | None,
) -> None:
def __init__(self, config, inference_api: Inference, files_api: Files | None) -> None:
super().__init__(files_api=files_api, kvstore=None)
self.config = config
self.inference_api = inference_api
self.models_api = models_api
self.cache: dict[str, VectorDBWithIndex] = {}
self.vector_db_store = None
self.cache: dict[str, VectorStoreWithIndex] = {}
self.vector_store_table = None
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore)
self.kvstore = await kvstore_impl(self.config.persistence)
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
for db_json in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(db_json)
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for db_json in stored_vector_stores:
vector_store = VectorStore.model_validate_json(db_json)
index = await SQLiteVecIndex.create(
vector_db.embedding_dimension,
self.config.db_path,
vector_db.identifier,
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
)
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
# Load existing OpenAI vector stores into the in-memory cache
await self.initialize_openai_vector_stores()
@ -446,67 +408,64 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def list_vector_dbs(self) -> list[VectorDB]:
return [v.vector_db for v in self.cache.values()]
async def list_vector_stores(self) -> list[VectorStore]:
return [v.vector_store for v in self.cache.values()]
async def register_vector_db(self, vector_db: VectorDB) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
index = await SQLiteVecIndex.create(
vector_db.embedding_dimension,
self.config.db_path,
vector_db.identifier,
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
)
self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)
self.cache[vector_store.identifier] = VectorStoreWithIndex(vector_store, index, self.inference_api)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_db_store is None:
raise VectorStoreNotFoundError(vector_db_id)
if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_store_id)
vector_db = self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
vector_store = self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
index = VectorDBWithIndex(
vector_db=vector_db,
index = VectorStoreWithIndex(
vector_store=vector_store,
index=SQLiteVecIndex(
dimension=vector_db.embedding_dimension,
dimension=vector_store.embedding_dimension,
db_path=self.config.db_path,
bank_id=vector_db.identifier,
bank_id=vector_store.identifier,
kvstore=self.kvstore,
),
inference_api=self.inference_api,
)
self.cache[vector_db_id] = index
self.cache[vector_store_id] = index
return index
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id not in self.cache:
logger.warning(f"Vector DB {vector_db_id} not found")
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id not in self.cache:
return
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
# The VectorDBWithIndex helper is expected to compute embeddings via the inference_api
# The VectorStoreWithIndex helper is expected to compute embeddings via the inference_api
# and then call our index's add_chunks.
await index.insert_chunks(chunks)
async def query_chunks(
self, vector_db_id: str, query: Any, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a sqlite_vec index."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise VectorStoreNotFoundError(store_id)

View file

@ -7,20 +7,17 @@ from typing import Any
from pydantic import BaseModel
from llama_stack.providers.utils.kvstore.config import (
KVStoreConfig,
SqliteKVStoreConfig,
)
from llama_stack.core.storage.datatypes import KVStoreReference
class HuggingfaceDatasetIOConfig(BaseModel):
kvstore: KVStoreConfig
kvstore: KVStoreReference
@classmethod
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
return {
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="huggingface_datasetio.db",
)
"kvstore": KVStoreReference(
backend="kv_default",
namespace="datasetio::huggingface",
).model_dump(exclude_none=True)
}

View file

@ -20,7 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service.
Build the NVIDIA environment:
```bash
llama stack build --distro nvidia --image-type venv
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
```
### Basic Usage using the LlamaStack Python Client

View file

@ -8,7 +8,7 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
from llama_stack.core.storage.datatypes import SqlStoreReference
class S3FilesImplConfig(BaseModel):
@ -24,7 +24,7 @@ class S3FilesImplConfig(BaseModel):
auto_create_bucket: bool = Field(
default=False, description="Automatically create the S3 bucket if it doesn't exist"
)
metadata_store: SqlStoreConfig = Field(description="SQL store configuration for file metadata")
metadata_store: SqlStoreReference = Field(description="SQL store configuration for file metadata")
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
@ -35,8 +35,8 @@ class S3FilesImplConfig(BaseModel):
"aws_secret_access_key": "${env.AWS_SECRET_ACCESS_KEY:=}",
"endpoint_url": "${env.S3_ENDPOINT_URL:=}",
"auto_create_bucket": "${env.S3_AUTO_CREATE_BUCKET:=false}",
"metadata_store": SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="s3_files_metadata.db",
),
"metadata_store": SqlStoreReference(
backend="sql_default",
table_name="s3_files_metadata",
).model_dump(exclude_none=True),
}

View file

@ -18,7 +18,7 @@ This provider enables running inference using NVIDIA NIM.
Build the NVIDIA environment:
```bash
llama stack build --distro nvidia --image-type venv
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
```
### Basic Usage using the LlamaStack Python Client
@ -45,7 +45,7 @@ The following example shows how to create a chat completion for an NVIDIA NIM.
```python
response = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
model="nvidia/meta/llama-3.1-8b-instruct",
messages=[
{
"role": "system",
@ -67,37 +67,40 @@ print(f"Response: {response.choices[0].message.content}")
The following example shows how to do tool calling for an NVIDIA NIM.
```python
from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
tool_definition = ToolDefinition(
tool_name="get_weather",
description="Get current weather information for a location",
parameters={
"location": ToolParamDefinition(
param_type="string",
description="The city and state, e.g. San Francisco, CA",
required=True,
),
"unit": ToolParamDefinition(
param_type="string",
description="Temperature unit (celsius or fahrenheit)",
required=False,
default="celsius",
),
tool_definition = {
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather information for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {
"type": "string",
"description": "Temperature unit (celsius or fahrenheit)",
"default": "celsius",
},
},
"required": ["location"],
},
},
)
}
tool_response = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
model="nvidia/meta/llama-3.1-8b-instruct",
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
tools=[tool_definition],
)
print(f"Tool Response: {tool_response.choices[0].message.content}")
print(f"Response content: {tool_response.choices[0].message.content}")
if tool_response.choices[0].message.tool_calls:
for tool_call in tool_response.choices[0].message.tool_calls:
print(f"Tool Called: {tool_call.tool_name}")
print(f"Arguments: {tool_call.arguments}")
print(f"Tool Called: {tool_call.function.name}")
print(f"Arguments: {tool_call.function.arguments}")
```
### Structured Output Example
@ -105,33 +108,26 @@ if tool_response.choices[0].message.tool_calls:
The following example shows how to do structured output for an NVIDIA NIM.
```python
from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
person_schema = {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"age": {"type": "number"},
"occupation": {"type": "string"},
},
"required": ["name", "age", "occupation"],
}
response_format = JsonSchemaResponseFormat(
type=ResponseFormatType.json_schema, json_schema=person_schema
)
structured_response = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
model="nvidia/meta/llama-3.1-8b-instruct",
messages=[
{
"role": "user",
"content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
}
],
response_format=response_format,
extra_body={"nvext": {"guided_json": person_schema}},
)
print(f"Structured Response: {structured_response.choices[0].message.content}")
```
@ -141,7 +137,7 @@ The following example shows how to create embeddings for an NVIDIA NIM.
```python
response = client.embeddings.create(
model="nvidia/llama-3.2-nv-embedqa-1b-v2",
model="nvidia/nvidia/llama-3.2-nv-embedqa-1b-v2",
input=["What is the capital of France?"],
extra_body={"input_type": "query"},
)
@ -163,15 +159,15 @@ image_path = {path_to_the_image}
demo_image_b64 = load_image_as_base64(image_path)
vlm_response = client.chat.completions.create(
model="nvidia/vila",
model="nvidia/meta/llama-3.2-11b-vision-instruct",
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"image": {
"data": demo_image_b64,
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{demo_image_b64}",
},
},
{

View file

@ -10,7 +10,7 @@ from .config import NVIDIAConfig
async def get_adapter_impl(config: NVIDIAConfig, _deps) -> Inference:
# import dynamically so `llama stack build` does not fail due to missing dependencies
# import dynamically so `llama stack list-deps` does not fail due to missing dependencies
from .nvidia import NVIDIAInferenceAdapter
if not isinstance(config, NVIDIAConfig):

View file

@ -19,15 +19,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
"""
NVIDIA Inference Adapter for Llama Stack.
Note: The inheritance order is important here. OpenAIMixin must come before
ModelRegistryHelper to ensure that OpenAIMixin.check_model_availability()
is used instead of ModelRegistryHelper.check_model_availability(). It also
must come before Inference to ensure that OpenAIMixin methods are available
in the Inference interface.
- OpenAIMixin.check_model_availability() queries the NVIDIA API to check if a model exists
- ModelRegistryHelper.check_model_availability() just returns False and shows a warning
"""
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html

View file

@ -22,7 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service
Build the NVIDIA environment:
```bash
llama stack build --distro nvidia --image-type venv
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
```
### Basic Usage using the LlamaStack Python Client

View file

@ -19,7 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV
Build the NVIDIA environment:
```bash
llama stack build --distro nvidia --image-type venv
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
```
### Basic Usage using the LlamaStack Python Client

View file

@ -12,11 +12,6 @@ from .config import ChromaVectorIOConfig
async def get_adapter_impl(config: ChromaVectorIOConfig, deps: dict[Api, ProviderSpec]):
from .chroma import ChromaVectorIOAdapter
impl = ChromaVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
impl = ChromaVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
await impl.initialize()
return impl

View file

@ -12,24 +12,16 @@ import chromadb
from numpy.typing import NDArray
from llama_stack.apis.files import Files
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import (
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
)
from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex
from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
@ -38,7 +30,7 @@ log = get_logger(name=__name__, category="vector_io::chroma")
ChromaClientType = chromadb.api.AsyncClientAPI | chromadb.api.ClientAPI
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:chroma:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:chroma:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:chroma:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:chroma:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:chroma:{VERSION}::"
@ -68,19 +60,13 @@ class ChromaIndex(EmbeddingIndex):
ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
await maybe_await(
self.collection.add(
documents=[chunk.model_dump_json() for chunk in chunks],
embeddings=embeddings,
ids=ids,
)
self.collection.add(documents=[chunk.model_dump_json() for chunk in chunks], embeddings=embeddings, ids=ids)
)
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
results = await maybe_await(
self.collection.query(
query_embeddings=[embedding.tolist()],
n_results=k,
include=["documents", "distances"],
query_embeddings=[embedding.tolist()], n_results=k, include=["documents", "distances"]
)
)
distances = results["distances"][0]
@ -108,12 +94,7 @@ class ChromaIndex(EmbeddingIndex):
async def delete(self):
await maybe_await(self.client.delete_collection(self.collection.name))
async def query_keyword(
self,
query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
raise NotImplementedError("Keyword search is not supported in Chroma")
async def delete_chunks(self, chunks_for_deletion: list[ChunkForDeletion]) -> None:
@ -133,26 +114,24 @@ class ChromaIndex(EmbeddingIndex):
raise NotImplementedError("Hybrid search is not supported in Chroma")
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(
self,
config: RemoteChromaVectorIOConfig | InlineChromaVectorIOConfig,
inference_api: Api.inference,
models_apis: Api.models,
inference_api: Inference,
files_api: Files | None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
log.info(f"Initializing ChromaVectorIOAdapter with url: {config}")
self.config = config
self.inference_api = inference_api
self.models_api = models_apis
self.client = None
self.cache = {}
self.vector_db_store = None
self.vector_store_table = None
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore)
self.vector_db_store = self.kvstore
self.kvstore = await kvstore_impl(self.config.persistence)
self.vector_store_table = self.kvstore
if isinstance(self.config, RemoteChromaVectorIOConfig):
log.info(f"Connecting to Chroma server at: {self.config.url}")
@ -172,70 +151,58 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def register_vector_db(
self,
vector_db: VectorDB,
) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
collection = await maybe_await(
self.client.get_or_create_collection(
name=vector_db.identifier,
metadata={"vector_db": vector_db.model_dump_json()},
name=vector_store.identifier, metadata={"vector_store": vector_store.model_dump_json()}
)
)
self.cache[vector_db.identifier] = VectorDBWithIndex(
vector_db, ChromaIndex(self.client, collection), self.inference_api
self.cache[vector_store.identifier] = VectorStoreWithIndex(
vector_store, ChromaIndex(self.client, collection), self.inference_api
)
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id not in self.cache:
log.warning(f"Vector DB {vector_db_id} not found")
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id not in self.cache:
log.warning(f"Vector DB {vector_store_id} not found")
return
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
async def insert_chunks(
self,
vector_db_id: str,
chunks: list[Chunk],
ttl_seconds: int | None = None,
) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_store_index(vector_db_id)
if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
await index.insert_chunks(chunks)
async def query_chunks(
self,
vector_db_id: str,
query: InterleavedContent,
params: dict[str, Any] | None = None,
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if index is None:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
return await index.query_chunks(query, params)
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
collection = await maybe_await(self.client.get_collection(vector_db_id))
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
collection = await maybe_await(self.client.get_collection(vector_store_id))
if not collection:
raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
self.cache[vector_db_id] = index
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
index = VectorStoreWithIndex(vector_store, ChromaIndex(self.client, collection), self.inference_api)
self.cache[vector_store_id] = index
return index
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete chunks from a Chroma vector store."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise ValueError(f"Vector DB {store_id} not found")

View file

@ -8,21 +8,21 @@ from typing import Any
from pydantic import BaseModel, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class ChromaVectorIOConfig(BaseModel):
url: str | None
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
persistence: KVStoreReference = Field(description="Config for KV store backend")
@classmethod
def sample_run_config(cls, __distro_dir__: str, url: str = "${env.CHROMADB_URL}", **kwargs: Any) -> dict[str, Any]:
return {
"url": url,
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="chroma_remote_registry.db",
),
"persistence": KVStoreReference(
backend="kv_default",
namespace="vector_io::chroma_remote",
).model_dump(exclude_none=True),
}

View file

@ -13,12 +13,6 @@ async def get_adapter_impl(config: MilvusVectorIOConfig, deps: dict[Api, Provide
from .milvus import MilvusVectorIOAdapter
assert isinstance(config, MilvusVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = MilvusVectorIOAdapter(
config,
deps[Api.inference],
deps[Api.models],
deps.get(Api.files),
)
impl = MilvusVectorIOAdapter(config, deps[Api.inference], deps.get(Api.files))
await impl.initialize()
return impl

View file

@ -8,7 +8,7 @@ from typing import Any
from pydantic import BaseModel, ConfigDict, Field
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@ -17,7 +17,7 @@ class MilvusVectorIOConfig(BaseModel):
uri: str = Field(description="The URI of the Milvus server")
token: str | None = Field(description="The token of the Milvus server")
consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong")
kvstore: KVStoreConfig = Field(description="Config for KV store backend")
persistence: KVStoreReference = Field(description="Config for KV store backend")
# This configuration allows additional fields to be passed through to the underlying Milvus client.
# See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general.
@ -28,8 +28,8 @@ class MilvusVectorIOConfig(BaseModel):
return {
"uri": "${env.MILVUS_ENDPOINT}",
"token": "${env.MILVUS_TOKEN}",
"kvstore": SqliteKVStoreConfig.sample_run_config(
__distro_dir__=__distro_dir__,
db_name="milvus_remote_registry.db",
),
"persistence": KVStoreReference(
backend="kv_default",
namespace="vector_io::milvus_remote",
).model_dump(exclude_none=True),
}

View file

@ -14,15 +14,10 @@ from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusC
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.models import Models
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
@ -31,7 +26,7 @@ from llama_stack.providers.utils.memory.vector_store import (
RERANKER_TYPE_WEIGHTED,
ChunkForDeletion,
EmbeddingIndex,
VectorDBWithIndex,
VectorStoreWithIndex,
)
from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name
@ -40,7 +35,7 @@ from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig
logger = get_logger(name=__name__, category="vector_io::milvus")
VERSION = "v3"
VECTOR_DBS_PREFIX = f"vector_dbs:milvus:{VERSION}::"
VECTOR_DBS_PREFIX = f"vector_stores:milvus:{VERSION}::"
VECTOR_INDEX_PREFIX = f"vector_index:milvus:{VERSION}::"
OPENAI_VECTOR_STORES_PREFIX = f"openai_vector_stores:milvus:{VERSION}::"
OPENAI_VECTOR_STORES_FILES_PREFIX = f"openai_vector_stores_files:milvus:{VERSION}::"
@ -74,46 +69,23 @@ class MilvusIndex(EmbeddingIndex):
logger.info(f"Creating new collection {self.collection_name} with nullable sparse field")
# Create schema for vector search
schema = self.client.create_schema()
schema.add_field(
field_name="chunk_id",
datatype=DataType.VARCHAR,
is_primary=True,
max_length=100,
)
schema.add_field(field_name="chunk_id", datatype=DataType.VARCHAR, is_primary=True, max_length=100)
schema.add_field(
field_name="content",
datatype=DataType.VARCHAR,
max_length=65535,
enable_analyzer=True, # Enable text analysis for BM25
)
schema.add_field(
field_name="vector",
datatype=DataType.FLOAT_VECTOR,
dim=len(embeddings[0]),
)
schema.add_field(
field_name="chunk_content",
datatype=DataType.JSON,
)
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=len(embeddings[0]))
schema.add_field(field_name="chunk_content", datatype=DataType.JSON)
# Add sparse vector field for BM25 (required by the function)
schema.add_field(
field_name="sparse",
datatype=DataType.SPARSE_FLOAT_VECTOR,
)
schema.add_field(field_name="sparse", datatype=DataType.SPARSE_FLOAT_VECTOR)
# Create indexes
index_params = self.client.prepare_index_params()
index_params.add_index(
field_name="vector",
index_type="FLAT",
metric_type="COSINE",
)
index_params.add_index(field_name="vector", index_type="FLAT", metric_type="COSINE")
# Add index for sparse field (required by BM25 function)
index_params.add_index(
field_name="sparse",
index_type="SPARSE_INVERTED_INDEX",
metric_type="BM25",
)
index_params.add_index(field_name="sparse", index_type="SPARSE_INVERTED_INDEX", metric_type="BM25")
# Add BM25 function for full-text search
bm25_function = Function(
@ -144,11 +116,7 @@ class MilvusIndex(EmbeddingIndex):
}
)
try:
await asyncio.to_thread(
self.client.insert,
self.collection_name,
data=data,
)
await asyncio.to_thread(self.client.insert, self.collection_name, data=data)
except Exception as e:
logger.error(f"Error inserting chunks into Milvus collection {self.collection_name}: {e}")
raise e
@ -167,12 +135,7 @@ class MilvusIndex(EmbeddingIndex):
scores = [res["distance"] for res in search_res[0]]
return QueryChunksResponse(chunks=chunks, scores=scores)
async def query_keyword(
self,
query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
"""
Perform BM25-based keyword search using Milvus's built-in full-text search.
"""
@ -210,12 +173,7 @@ class MilvusIndex(EmbeddingIndex):
# Fallback to simple text search
return await self._fallback_keyword_search(query_string, k, score_threshold)
async def _fallback_keyword_search(
self,
query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
async def _fallback_keyword_search(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
"""
Fallback to simple text search when BM25 search is not available.
"""
@ -303,12 +261,11 @@ class MilvusIndex(EmbeddingIndex):
raise
class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtocolPrivate):
def __init__(
self,
config: RemoteMilvusVectorIOConfig | InlineMilvusVectorIOConfig,
inference_api: Inference,
models_api: Models,
files_api: Files | None,
) -> None:
super().__init__(files_api=files_api, kvstore=None)
@ -316,29 +273,28 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
self.cache = {}
self.client = None
self.inference_api = inference_api
self.models_api = models_api
self.vector_db_store = None
self.vector_store_table = None
self.metadata_collection_name = "openai_vector_stores_metadata"
async def initialize(self) -> None:
self.kvstore = await kvstore_impl(self.config.kvstore)
self.kvstore = await kvstore_impl(self.config.persistence)
start_key = VECTOR_DBS_PREFIX
end_key = f"{VECTOR_DBS_PREFIX}\xff"
stored_vector_dbs = await self.kvstore.values_in_range(start_key, end_key)
stored_vector_stores = await self.kvstore.values_in_range(start_key, end_key)
for vector_db_data in stored_vector_dbs:
vector_db = VectorDB.model_validate_json(vector_db_data)
index = VectorDBWithIndex(
vector_db,
for vector_store_data in stored_vector_stores:
vector_store = VectorStore.model_validate_json(vector_store_data)
index = VectorStoreWithIndex(
vector_store,
index=MilvusIndex(
client=self.client,
collection_name=vector_db.identifier,
collection_name=vector_store.identifier,
consistency_level=self.config.consistency_level,
kvstore=self.kvstore,
),
inference_api=self.inference_api,
)
self.cache[vector_db.identifier] = index
self.cache[vector_store.identifier] = index
if isinstance(self.config, RemoteMilvusVectorIOConfig):
logger.info(f"Connecting to Milvus server at {self.config.uri}")
self.client = MilvusClient(**self.config.model_dump(exclude_none=True))
@ -355,72 +311,61 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
# Clean up mixin resources (file batch tasks)
await super().shutdown()
async def register_vector_db(
self,
vector_db: VectorDB,
) -> None:
async def register_vector_store(self, vector_store: VectorStore) -> None:
if isinstance(self.config, RemoteMilvusVectorIOConfig):
consistency_level = self.config.consistency_level
else:
consistency_level = "Strong"
index = VectorDBWithIndex(
vector_db=vector_db,
index=MilvusIndex(self.client, vector_db.identifier, consistency_level=consistency_level),
index = VectorStoreWithIndex(
vector_store=vector_store,
index=MilvusIndex(self.client, vector_store.identifier, consistency_level=consistency_level),
inference_api=self.inference_api,
)
self.cache[vector_db.identifier] = index
self.cache[vector_store.identifier] = index
async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex | None:
if vector_db_id in self.cache:
return self.cache[vector_db_id]
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
if vector_store_id in self.cache:
return self.cache[vector_store_id]
if self.vector_db_store is None:
raise VectorStoreNotFoundError(vector_db_id)
if self.vector_store_table is None:
raise VectorStoreNotFoundError(vector_store_id)
vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
if not vector_db:
raise VectorStoreNotFoundError(vector_db_id)
vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
if not vector_store:
raise VectorStoreNotFoundError(vector_store_id)
index = VectorDBWithIndex(
vector_db=vector_db,
index=MilvusIndex(client=self.client, collection_name=vector_db.identifier, kvstore=self.kvstore),
index = VectorStoreWithIndex(
vector_store=vector_store,
index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
inference_api=self.inference_api,
)
self.cache[vector_db_id] = index
self.cache[vector_store_id] = index
return index
async def unregister_vector_db(self, vector_db_id: str) -> None:
if vector_db_id in self.cache:
await self.cache[vector_db_id].index.delete()
del self.cache[vector_db_id]
async def unregister_vector_store(self, vector_store_id: str) -> None:
if vector_store_id in self.cache:
await self.cache[vector_store_id].index.delete()
del self.cache[vector_store_id]
async def insert_chunks(
self,
vector_db_id: str,
chunks: list[Chunk],
ttl_seconds: int | None = None,
) -> None:
index = await self._get_and_cache_vector_db_index(vector_db_id)
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
await index.insert_chunks(chunks)
async def query_chunks(
self,
vector_db_id: str,
query: InterleavedContent,
params: dict[str, Any] | None = None,
self, vector_db_id: str, query: InterleavedContent, params: dict[str, Any] | None = None
) -> QueryChunksResponse:
index = await self._get_and_cache_vector_db_index(vector_db_id)
index = await self._get_and_cache_vector_store_index(vector_db_id)
if not index:
raise VectorStoreNotFoundError(vector_db_id)
return await index.query_chunks(query, params)
async def delete_chunks(self, store_id: str, chunks_for_deletion: list[ChunkForDeletion]) -> None:
"""Delete a chunk from a milvus vector store."""
index = await self._get_and_cache_vector_db_index(store_id)
index = await self._get_and_cache_vector_store_index(store_id)
if not index:
raise VectorStoreNotFoundError(store_id)

Some files were not shown because too many files have changed in this diff Show more