group all server stores under storage.stores

This commit is contained in:
Ashwin Bharambe 2025-10-19 08:06:19 -07:00
parent 636764c2a1
commit 2bba56a0a8
35 changed files with 806 additions and 503 deletions

View file

@ -98,21 +98,30 @@ data:
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
metadata_store: storage:
type: postgres backends:
host: ${env.POSTGRES_HOST:=localhost} kv_default:
port: ${env.POSTGRES_PORT:=5432} type: kv_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
table_name: llamastack_kvstore user: ${env.POSTGRES_USER:=llamastack}
inference_store: password: ${env.POSTGRES_PASSWORD:=llamastack}
type: postgres table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
host: ${env.POSTGRES_HOST:=localhost} sql_default:
port: ${env.POSTGRES_PORT:=5432} type: sql_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768
@ -137,5 +146,4 @@ data:
port: 8323 port: 8323
kind: ConfigMap kind: ConfigMap
metadata: metadata:
creationTimestamp: null
name: llama-stack-config name: llama-stack-config

View file

@ -95,21 +95,30 @@ providers:
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
metadata_store: storage:
type: postgres backends:
host: ${env.POSTGRES_HOST:=localhost} kv_default:
port: ${env.POSTGRES_PORT:=5432} type: kv_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
table_name: llamastack_kvstore user: ${env.POSTGRES_USER:=llamastack}
inference_store: password: ${env.POSTGRES_PASSWORD:=llamastack}
type: postgres table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
host: ${env.POSTGRES_HOST:=localhost} sql_default:
port: ${env.POSTGRES_PORT:=5432} type: sql_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768

View file

@ -44,18 +44,32 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: sqlite agent_state:
namespace: null backend: kv_default
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db namespace: agents
responses:
backend: sql_default
table_name: responses
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: {} config: {}
metadata_store: storage:
namespace: null backends:
type: sqlite kv_default:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -1,56 +1,155 @@
apiVersion: v1 apiVersion: v1
data: data:
stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n- stack_run_config.yaml: |
inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n version: '2'
\ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n image_name: kubernetes-demo
\ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens: apis:
${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify: - agents
${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type: - inference
remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n - files
\ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n - safety
\ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n - telemetry
\ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n - tool_runtime
\ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n - vector_io
\ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n providers:
\ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n inference:
\ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n - provider_id: vllm-inference
\ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id: provider_type: remote::vllm
meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir: config:
${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}
\ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
\ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n api_token: ${env.VLLM_API_TOKEN:=fake}
\ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n tls_verify: ${env.VLLM_TLS_VERIFY:=true}
\ provider_type: inline::meta-reference\n config:\n persistence_store:\n - provider_id: vllm-safety
\ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port: provider_type: remote::vllm
${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: config:
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
\ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n api_token: ${env.VLLM_API_TOKEN:=fake}
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n tls_verify: ${env.VLLM_TLS_VERIFY:=true}
\ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n - provider_id: sentence-transformers
\ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks: provider_type: inline::sentence-transformers
${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n config: {}
\ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n vector_io:
\ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
\ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results: provider_type: remote::chromadb
3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config: config:
{}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n url: ${env.CHROMADB_URL:=}
\ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n kvstore:
\ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: type: postgres
${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n host: ${env.POSTGRES_HOST:=localhost}
\ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host: port: ${env.POSTGRES_PORT:=5432}
${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n db: ${env.POSTGRES_DB:=llamastack}
\ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n- user: ${env.POSTGRES_USER:=llamastack}
metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id: password: ${env.POSTGRES_PASSWORD:=llamastack}
sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n files:
\ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id: - provider_id: meta-reference-files
${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n provider_type: inline::localfs
\ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs: config:
[]\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id: storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n metadata_store:
\ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n type: sqlite
\ type: github_token\n" db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:+}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:+}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
storage:
backends:
kv_default:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_default:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models:
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
provider_id: vllm-safety
model_type: llm
shields:
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
auth:
provider_config:
type: github_token
kind: ConfigMap kind: ConfigMap
metadata: metadata:
creationTimestamp: null
name: llama-stack-config name: llama-stack-config

View file

@ -93,21 +93,30 @@ providers:
- provider_id: model-context-protocol - provider_id: model-context-protocol
provider_type: remote::model-context-protocol provider_type: remote::model-context-protocol
config: {} config: {}
metadata_store: storage:
type: postgres backends:
host: ${env.POSTGRES_HOST:=localhost} kv_default:
port: ${env.POSTGRES_PORT:=5432} type: kv_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
table_name: llamastack_kvstore user: ${env.POSTGRES_USER:=llamastack}
inference_store: password: ${env.POSTGRES_PASSWORD:=llamastack}
type: postgres table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
host: ${env.POSTGRES_HOST:=localhost} sql_default:
port: ${env.POSTGRES_PORT:=5432} type: sql_postgres
db: ${env.POSTGRES_DB:=llamastack} host: ${env.POSTGRES_HOST:=localhost}
user: ${env.POSTGRES_USER:=llamastack} port: ${env.POSTGRES_PORT:=5432}
password: ${env.POSTGRES_PASSWORD:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768

View file

@ -43,6 +43,7 @@ from llama_stack.core.stack import replace_env_vars
from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.datatypes import (
InferenceStoreReference, InferenceStoreReference,
KVStoreReference, KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig, SqliteKVStoreConfig,
SqliteSqlStoreConfig, SqliteSqlStoreConfig,
SqlStoreReference, SqlStoreReference,
@ -302,7 +303,21 @@ def _generate_run_config(
"sql_default": SqliteSqlStoreConfig( "sql_default": SqliteSqlStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db", db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
), ),
} },
stores=ServerStoresConfig(
metadata=KVStoreReference(
backend="kv_default",
namespace="registry",
),
inference=InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
),
conversations=SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
),
),
) )
run_config = StackRunConfig( run_config = StackRunConfig(
@ -311,18 +326,6 @@ def _generate_run_config(
apis=apis, apis=apis,
providers={}, providers={},
storage=storage, storage=storage,
metadata_store=KVStoreReference(
backend="kv_default",
namespace="registry",
),
inference_store=InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
),
conversations_store=SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
),
external_providers_dir=build_config.external_providers_dir external_providers_dir=build_config.external_providers_dir
if build_config.external_providers_dir if build_config.external_providers_dir
else EXTERNAL_PROVIDERS_DIR, else EXTERNAL_PROVIDERS_DIR,

View file

@ -159,6 +159,37 @@ def upgrade_from_routing_table(
config_dict["apis"] = config_dict["apis_to_serve"] config_dict["apis"] = config_dict["apis_to_serve"]
config_dict.pop("apis_to_serve", None) config_dict.pop("apis_to_serve", None)
# Add default storage config if not present
if "storage" not in config_dict:
config_dict["storage"] = {
"backends": {
"kv_default": {
"type": "kv_sqlite",
"db_path": "~/.llama/kvstore.db",
},
"sql_default": {
"type": "sql_sqlite",
"db_path": "~/.llama/sql_store.db",
},
},
"stores": {
"metadata": {
"namespace": "registry",
"backend": "kv_default",
},
"inference": {
"table_name": "inference_store",
"backend": "sql_default",
"max_write_queue_size": 10000,
"num_writers": 4,
},
"conversations": {
"table_name": "openai_conversations",
"backend": "sql_default",
},
},
}
return config_dict return config_dict

View file

@ -56,9 +56,9 @@ class ConversationServiceImpl(Conversations):
self.policy = config.policy self.policy = config.policy
# Use conversations store reference from run config # Use conversations store reference from run config
conversations_ref = config.run_config.conversations_store conversations_ref = config.run_config.storage.stores.conversations
if not conversations_ref: if not conversations_ref:
raise ValueError("conversations_store must be configured in run config") raise ValueError("storage.stores.conversations must be configured in run config")
base_sql_store = sqlstore_impl(conversations_ref) base_sql_store = sqlstore_impl(conversations_ref)
self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy) self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)

View file

@ -27,9 +27,7 @@ from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.access_control.datatypes import AccessRule
from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference, KVStoreReference,
SqlStoreReference,
StorageBackendType, StorageBackendType,
StorageConfig, StorageConfig,
) )
@ -470,19 +468,7 @@ can be instantiated multiple times (with different configs) if necessary.
""", """,
) )
storage: StorageConfig = Field( storage: StorageConfig = Field(
description="Catalog of named storage backends available to the stack", description="Catalog of named storage backends and references available to the stack",
)
metadata_store: KVStoreReference | None = Field(
default=None,
description="Reference to the KV store backend used by the distribution registry (kv_* backend).",
)
inference_store: InferenceStoreReference | None = Field(
default=None,
description="Reference to the SQL store backend used by the inference API (sql_* backend).",
)
conversations_store: SqlStoreReference | None = Field(
default=None,
description="Reference to the SQL store backend used by the conversations API (sql_* backend).",
) )
# registry of "resources" in the distribution # registry of "resources" in the distribution
@ -523,8 +509,9 @@ can be instantiated multiple times (with different configs) if necessary.
return v return v
@model_validator(mode="after") @model_validator(mode="after")
def validate_storage_references(self) -> "StackRunConfig": def validate_server_stores(self) -> "StackRunConfig":
backend_map = self.storage.backends if self.storage else {} backend_map = self.storage.backends
stores = self.storage.stores
kv_backends = { kv_backends = {
name name
for name, cfg in backend_map.items() for name, cfg in backend_map.items()
@ -558,9 +545,10 @@ can be instantiated multiple times (with different configs) if necessary.
f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required." f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
) )
_ensure_backend(self.metadata_store, kv_backends, "metadata_store") _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
_ensure_backend(self.inference_store, sql_backends, "inference_store") _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
_ensure_backend(self.conversations_store, sql_backends, "conversations_store") _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
_ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
return self return self

View file

@ -41,9 +41,9 @@ class PromptServiceImpl(Prompts):
async def initialize(self) -> None: async def initialize(self) -> None:
# Use metadata store backend with prompts-specific namespace # Use metadata store backend with prompts-specific namespace
metadata_ref = self.config.run_config.metadata_store metadata_ref = self.config.run_config.storage.stores.metadata
if not metadata_ref: if not metadata_ref:
raise ValueError("metadata_store must be configured in run config") raise ValueError("storage.stores.metadata must be configured in run config")
prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend) prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
self.kvstore = await kvstore_impl(prompts_ref) self.kvstore = await kvstore_impl(prompts_ref)

View file

@ -80,9 +80,9 @@ async def get_auto_router_impl(
# TODO: move pass configs to routers instead # TODO: move pass configs to routers instead
if api == Api.inference: if api == Api.inference:
inference_ref = run_config.inference_store inference_ref = run_config.storage.stores.inference
if not inference_ref: if not inference_ref:
raise ValueError("inference_store must be configured in run config") raise ValueError("storage.stores.inference must be configured in run config")
inference_store = InferenceStore( inference_store = InferenceStore(
reference=inference_ref, reference=inference_ref,

View file

@ -368,9 +368,10 @@ class Stack:
logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}") logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
_initialize_storage(self.run_config) _initialize_storage(self.run_config)
if not self.run_config.metadata_store: stores = self.run_config.storage.stores
raise ValueError("metadata_store must be configured with a kv_* backend") if not stores.metadata:
dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name) raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else [] policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
internal_impls = {} internal_impls = {}

View file

@ -72,7 +72,7 @@ class SqliteKVStoreConfig(CommonConfig):
class PostgresKVStoreConfig(CommonConfig): class PostgresKVStoreConfig(CommonConfig):
type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
host: str = "localhost" host: str = "localhost"
port: int = 5432 port: int | str = 5432
db: str = "llamastack" db: str = "llamastack"
user: str user: str
password: str | None = None password: str | None = None
@ -175,7 +175,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
host: str = "localhost" host: str = "localhost"
port: int = 5432 port: int | str = 5432
db: str = "llamastack" db: str = "llamastack"
user: str user: str
password: str | None = None password: str | None = None
@ -254,7 +254,30 @@ class ResponsesStoreReference(InferenceStoreReference):
"""Responses store configuration with queue tuning.""" """Responses store configuration with queue tuning."""
class ServerStoresConfig(BaseModel):
metadata: KVStoreReference | None = Field(
default=None,
description="Metadata store configuration (uses KV backend)",
)
inference: InferenceStoreReference | None = Field(
default=None,
description="Inference store configuration (uses SQL backend)",
)
conversations: SqlStoreReference | None = Field(
default=None,
description="Conversations store configuration (uses SQL backend)",
)
responses: ResponsesStoreReference | None = Field(
default=None,
description="Responses store configuration (uses SQL backend)",
)
class StorageConfig(BaseModel): class StorageConfig(BaseModel):
backends: dict[str, StorageBackendConfig] = Field( backends: dict[str, StorageBackendConfig] = Field(
description="Named backend configurations (e.g., 'default', 'cache')", description="Named backend configurations (e.g., 'default', 'cache')",
) )
stores: ServerStoresConfig = Field(
default_factory=lambda: ServerStoresConfig(),
description="Named references to storage backends used by the stack core",
)

View file

@ -220,17 +220,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: shields:
- shield_id: llama-guard - shield_id: llama-guard

View file

@ -97,17 +97,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -93,17 +93,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -110,17 +110,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -100,17 +100,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -99,17 +99,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -88,17 +88,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: [] shields: []
vector_dbs: [] vector_dbs: []

View file

@ -130,17 +130,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: gpt-4o model_id: gpt-4o

View file

@ -75,17 +75,18 @@ storage:
db: ${env.POSTGRES_DB:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -223,17 +223,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: shields:
- shield_id: llama-guard - shield_id: llama-guard

View file

@ -220,17 +220,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: shields:
- shield_id: llama-guard - shield_id: llama-guard

View file

@ -188,6 +188,7 @@ class RunConfigSettings(BaseModel):
default_benchmarks: list[BenchmarkInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
storage_backends: dict[str, Any] | None = None storage_backends: dict[str, Any] | None = None
storage_stores: dict[str, Any] | None = None
def run_config( def run_config(
self, self,
@ -241,19 +242,25 @@ class RunConfigSettings(BaseModel):
), ),
} }
storage_config = dict(backends=storage_backends) storage_stores = self.storage_stores or {
metadata_store = KVStoreReference( "metadata": KVStoreReference(
backend="kv_default", backend="kv_default",
namespace="registry", namespace="registry",
).model_dump(exclude_none=True) ).model_dump(exclude_none=True),
inference_store = InferenceStoreReference( "inference": InferenceStoreReference(
backend="sql_default", backend="sql_default",
table_name="inference_store", table_name="inference_store",
).model_dump(exclude_none=True) ).model_dump(exclude_none=True),
conversations_store = SqlStoreReference( "conversations": SqlStoreReference(
backend="sql_default", backend="sql_default",
table_name="openai_conversations", table_name="openai_conversations",
).model_dump(exclude_none=True) ).model_dump(exclude_none=True),
}
storage_config = dict(
backends=storage_backends,
stores=storage_stores,
)
# Return a dict that matches StackRunConfig structure # Return a dict that matches StackRunConfig structure
return { return {
@ -263,9 +270,6 @@ class RunConfigSettings(BaseModel):
"apis": apis, "apis": apis,
"providers": provider_configs, "providers": provider_configs,
"storage": storage_config, "storage": storage_config,
"metadata_store": metadata_store,
"inference_store": inference_store,
"conversations_store": conversations_store,
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
"vector_dbs": [], "vector_dbs": [],

View file

@ -103,17 +103,18 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
metadata_store: stores:
namespace: registry metadata:
backend: kv_default namespace: registry
inference_store: backend: kv_default
table_name: inference_store inference:
backend: sql_default table_name: inference_store
max_write_queue_size: 10000 backend: sql_default
num_writers: 4 max_write_queue_size: 10000
conversations_store: num_writers: 4
table_name: openai_conversations conversations:
backend: sql_default table_name: openai_conversations
backend: sql_default
models: [] models: []
shields: [] shields: []
vector_dbs: [] vector_dbs: []

View file

@ -29,19 +29,20 @@ def test_starter_distribution_config_loads_and_resolves():
assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig) assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig)
assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig) assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig)
assert config.metadata_store is not None stores = config.storage.stores
assert config.metadata_store.backend == "kv_default" assert stores.metadata is not None
assert config.metadata_store.namespace == "registry" assert stores.metadata.backend == "kv_default"
assert stores.metadata.namespace == "registry"
assert config.inference_store is not None assert stores.inference is not None
assert config.inference_store.backend == "sql_default" assert stores.inference.backend == "sql_default"
assert config.inference_store.table_name == "inference_store" assert stores.inference.table_name == "inference_store"
assert config.inference_store.max_write_queue_size > 0 assert stores.inference.max_write_queue_size > 0
assert config.inference_store.num_writers > 0 assert stores.inference.num_writers > 0
assert config.conversations_store is not None assert stores.conversations is not None
assert config.conversations_store.backend == "sql_default" assert stores.conversations.backend == "sql_default"
assert config.conversations_store.table_name == "openai_conversations" assert stores.conversations.table_name == "openai_conversations"
def test_postgres_demo_distribution_config_loads(): def test_postgres_demo_distribution_config_loads():
@ -62,6 +63,9 @@ def test_postgres_demo_distribution_config_loads():
kv_backend = config.storage.backends["kv_default"] kv_backend = config.storage.backends["kv_default"]
assert isinstance(kv_backend, PostgresKVStoreConfig) assert isinstance(kv_backend, PostgresKVStoreConfig)
stores = config.storage.stores
# Stores target the Postgres backends explicitly # Stores target the Postgres backends explicitly
assert config.metadata_store.backend == "kv_default" assert stores.metadata is not None
assert config.inference_store.backend == "sql_default" assert stores.metadata.backend == "kv_default"
assert stores.inference is not None
assert stores.inference.backend == "sql_default"

View file

@ -23,6 +23,27 @@ def config_with_image_name_int():
image_name: 1234 image_name: 1234
apis_to_serve: [] apis_to_serve: []
built_at: {datetime.now().isoformat()} built_at: {datetime.now().isoformat()}
storage:
backends:
kv_default:
type: kv_sqlite
db_path: /tmp/test_kv.db
sql_default:
type: sql_sqlite
db_path: /tmp/test_sql.db
stores:
metadata:
backend: kv_default
namespace: metadata
inference:
backend: sql_default
table_name: inference
conversations:
backend: sql_default
table_name: conversations
responses:
backend: sql_default
table_name: responses
providers: providers:
inference: inference:
- provider_id: provider1 - provider_id: provider1
@ -54,6 +75,27 @@ def up_to_date_config():
image_name: foo image_name: foo
apis_to_serve: [] apis_to_serve: []
built_at: {datetime.now().isoformat()} built_at: {datetime.now().isoformat()}
storage:
backends:
kv_default:
type: kv_sqlite
db_path: /tmp/test_kv.db
sql_default:
type: sql_sqlite
db_path: /tmp/test_sql.db
stores:
metadata:
backend: kv_default
namespace: metadata
inference:
backend: sql_default
table_name: inference
conversations:
backend: sql_default
table_name: conversations
responses:
backend: sql_default
table_name: responses
providers: providers:
inference: inference:
- provider_id: provider1 - provider_id: provider1

View file

@ -20,7 +20,14 @@ from llama_stack.core.conversations.conversations import (
ConversationServiceConfig, ConversationServiceConfig,
ConversationServiceImpl, ConversationServiceImpl,
) )
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from llama_stack.core.datatypes import StackRunConfig
from llama_stack.core.storage.datatypes import (
ServerStoresConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
StorageConfig,
)
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
@pytest.fixture @pytest.fixture
@ -28,7 +35,18 @@ async def service():
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
db_path = Path(tmpdir) / "test_conversations.db" db_path = Path(tmpdir) / "test_conversations.db"
config = ConversationServiceConfig(conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=[]) storage = StorageConfig(
backends={
"sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
},
stores=ServerStoresConfig(
conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
),
)
register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
config = ConversationServiceConfig(run_config=run_config, policy=[])
service = ConversationServiceImpl(config, {}) service = ConversationServiceImpl(config, {})
await service.initialize() await service.initialize()
yield service yield service
@ -121,9 +139,18 @@ async def test_policy_configuration():
AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*")) AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*"))
] ]
config = ConversationServiceConfig( storage = StorageConfig(
conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=restrictive_policy backends={
"sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
},
stores=ServerStoresConfig(
conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
),
) )
register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy)
service = ConversationServiceImpl(config, {}) service = ConversationServiceImpl(config, {})
await service.initialize() await service.initialize()

View file

@ -16,6 +16,7 @@ from llama_stack.core.datatypes import (
from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.datatypes import (
InferenceStoreReference, InferenceStoreReference,
KVStoreReference, KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig, SqliteKVStoreConfig,
SqliteSqlStoreConfig, SqliteSqlStoreConfig,
SqlStoreReference, SqlStoreReference,
@ -24,13 +25,30 @@ from llama_stack.core.storage.datatypes import (
def _base_run_config(**overrides): def _base_run_config(**overrides):
metadata_reference = overrides.pop(
"metadata_reference",
KVStoreReference(backend="kv_default", namespace="registry"),
)
inference_reference = overrides.pop(
"inference_reference",
InferenceStoreReference(backend="sql_default", table_name="inference"),
)
conversations_reference = overrides.pop(
"conversations_reference",
SqlStoreReference(backend="sql_default", table_name="conversations"),
)
storage = overrides.pop( storage = overrides.pop(
"storage", "storage",
StorageConfig( StorageConfig(
backends={ backends={
"kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"), "kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"),
"sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"), "sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"),
} },
stores=ServerStoresConfig(
metadata=metadata_reference,
inference=inference_reference,
conversations=conversations_reference,
),
), ),
) )
return StackRunConfig( return StackRunConfig(
@ -39,39 +57,28 @@ def _base_run_config(**overrides):
apis=[], apis=[],
providers={}, providers={},
storage=storage, storage=storage,
metadata_store=overrides.pop(
"metadata_store",
KVStoreReference(backend="kv_default", namespace="registry"),
),
inference_store=overrides.pop(
"inference_store",
InferenceStoreReference(backend="sql_default", table_name="inference"),
),
conversations_store=overrides.pop(
"conversations_store",
SqlStoreReference(backend="sql_default", table_name="conversations"),
),
**overrides, **overrides,
) )
def test_references_require_known_backend(): def test_references_require_known_backend():
with pytest.raises(ValidationError, match="unknown backend 'missing'"): with pytest.raises(ValidationError, match="unknown backend 'missing'"):
_base_run_config(metadata_store=KVStoreReference(backend="missing", namespace="registry")) _base_run_config(metadata_reference=KVStoreReference(backend="missing", namespace="registry"))
def test_references_must_match_backend_family(): def test_references_must_match_backend_family():
with pytest.raises(ValidationError, match="kv_.* is required"): with pytest.raises(ValidationError, match="kv_.* is required"):
_base_run_config(metadata_store=KVStoreReference(backend="sql_default", namespace="registry")) _base_run_config(metadata_reference=KVStoreReference(backend="sql_default", namespace="registry"))
with pytest.raises(ValidationError, match="sql_.* is required"): with pytest.raises(ValidationError, match="sql_.* is required"):
_base_run_config( _base_run_config(
inference_store=InferenceStoreReference(backend="kv_default", table_name="inference"), inference_reference=InferenceStoreReference(backend="kv_default", table_name="inference"),
) )
def test_valid_configuration_passes_validation(): def test_valid_configuration_passes_validation():
config = _base_run_config() config = _base_run_config()
assert config.metadata_store.backend == "kv_default" stores = config.storage.stores
assert config.inference_store.backend == "sql_default" assert stores.metadata is not None and stores.metadata.backend == "kv_default"
assert config.conversations_store.backend == "sql_default" assert stores.inference is not None and stores.inference.backend == "sql_default"
assert stores.conversations is not None and stores.conversations.backend == "sql_default"

View file

@ -16,6 +16,7 @@ from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry,
from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.datatypes import (
InferenceStoreReference, InferenceStoreReference,
KVStoreReference, KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig, SqliteKVStoreConfig,
SqliteSqlStoreConfig, SqliteSqlStoreConfig,
SqlStoreReference, SqlStoreReference,
@ -42,35 +43,25 @@ def _default_storage() -> StorageConfig:
backends={ backends={
"kv_default": SqliteKVStoreConfig(db_path=":memory:"), "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
"sql_default": SqliteSqlStoreConfig(db_path=":memory:"), "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
} },
stores=ServerStoresConfig(
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
),
) )
def make_stack_config(**overrides) -> StackRunConfig: def make_stack_config(**overrides) -> StackRunConfig:
storage = overrides.pop("storage", _default_storage()) storage = overrides.pop("storage", _default_storage())
metadata_store = overrides.pop(
"metadata_store",
KVStoreReference(backend="kv_default", namespace="registry"),
)
inference_store = overrides.pop(
"inference_store",
InferenceStoreReference(backend="sql_default", table_name="inference_store"),
)
conversations_store = overrides.pop(
"conversations_store",
SqlStoreReference(backend="sql_default", table_name="conversations"),
)
defaults = dict( defaults = dict(
image_name="test_image", image_name="test_image",
apis=[], apis=[],
providers={}, providers={},
storage=storage, storage=storage,
metadata_store=metadata_store,
inference_store=inference_store,
conversations_store=conversations_store,
) )
defaults.update(overrides) defaults.update(overrides)
return make_stack_config(**defaults) return StackRunConfig(**defaults)
@pytest.fixture @pytest.fixture

View file

@ -12,6 +12,7 @@ from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceI
from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.datatypes import (
InferenceStoreReference, InferenceStoreReference,
KVStoreReference, KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig, SqliteKVStoreConfig,
SqliteSqlStoreConfig, SqliteSqlStoreConfig,
SqlStoreReference, SqlStoreReference,
@ -32,16 +33,18 @@ async def temp_prompt_store(tmp_path_factory):
backends={ backends={
"kv_test": SqliteKVStoreConfig(db_path=db_path), "kv_test": SqliteKVStoreConfig(db_path=db_path),
"sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")), "sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")),
} },
stores=ServerStoresConfig(
metadata=KVStoreReference(backend="kv_test", namespace="registry"),
inference=InferenceStoreReference(backend="sql_test", table_name="inference"),
conversations=SqlStoreReference(backend="sql_test", table_name="conversations"),
),
) )
mock_run_config = StackRunConfig( mock_run_config = StackRunConfig(
image_name="test-distribution", image_name="test-distribution",
apis=[], apis=[],
providers={}, providers={},
storage=storage, storage=storage,
metadata_store=KVStoreReference(backend="kv_test", namespace="registry"),
inference_store=InferenceStoreReference(backend="sql_test", table_name="inference"),
conversations_store=SqlStoreReference(backend="sql_test", table_name="conversations"),
) )
config = PromptServiceConfig(run_config=mock_run_config) config = PromptServiceConfig(run_config=mock_run_config)
store = PromptServiceImpl(config, deps={}) store = PromptServiceImpl(config, deps={})

View file

@ -26,6 +26,24 @@ from llama_stack.providers.inline.agents.meta_reference.config import MetaRefere
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo
@pytest.fixture(autouse=True)
def setup_backends(tmp_path):
"""Register KV and SQL store backends for testing."""
from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
kv_path = str(tmp_path / "test_kv.db")
sql_path = str(tmp_path / "test_sql.db")
register_kvstore_backends({
"kv_default": SqliteKVStoreConfig(db_path=kv_path)
})
register_sqlstore_backends({
"sql_default": SqliteSqlStoreConfig(db_path=sql_path)
})
@pytest.fixture @pytest.fixture
def mock_apis(): def mock_apis():
return { return {
@ -40,15 +58,20 @@ def mock_apis():
@pytest.fixture @pytest.fixture
def config(tmp_path): def config(tmp_path):
from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
from llama_stack.providers.inline.agents.meta_reference.config import AgentPersistenceConfig
return MetaReferenceAgentsImplConfig( return MetaReferenceAgentsImplConfig(
persistence_store={ persistence=AgentPersistenceConfig(
"type": "sqlite", agent_state=KVStoreReference(
"db_path": str(tmp_path / "test.db"), backend="kv_default",
}, namespace="agents",
responses_store={ ),
"type": "sqlite", responses=ResponsesStoreReference(
"db_path": str(tmp_path / "test.db"), backend="sql_default",
}, table_name="responses",
),
)
) )

View file

@ -19,12 +19,15 @@ from llama_stack.core.routing_tables.models import ModelsRoutingTable
from llama_stack.core.storage.datatypes import ( from llama_stack.core.storage.datatypes import (
InferenceStoreReference, InferenceStoreReference,
KVStoreReference, KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig, SqliteKVStoreConfig,
SqliteSqlStoreConfig, SqliteSqlStoreConfig,
SqlStoreReference, SqlStoreReference,
StorageConfig, StorageConfig,
) )
from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
from llama_stack.providers.utils.kvstore import register_kvstore_backends
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None: def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
@ -76,26 +79,23 @@ def make_run_config(**overrides) -> StackRunConfig:
backends={ backends={
"kv_default": SqliteKVStoreConfig(db_path=":memory:"), "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
"sql_default": SqliteSqlStoreConfig(db_path=":memory:"), "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
} },
stores=ServerStoresConfig(
metadata=KVStoreReference(backend="kv_default", namespace="registry"),
inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
),
), ),
) )
register_kvstore_backends({name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("kv_")})
register_sqlstore_backends(
{name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("sql_")}
)
defaults = dict( defaults = dict(
image_name="test_image", image_name="test_image",
apis=[], apis=[],
providers={}, providers={},
storage=storage, storage=storage,
metadata_store=overrides.pop(
"metadata_store",
KVStoreReference(backend="kv_default", namespace="registry"),
),
inference_store=overrides.pop(
"inference_store",
InferenceStoreReference(backend="sql_default", table_name="inference_store"),
),
conversations_store=overrides.pop(
"conversations_store",
SqlStoreReference(backend="sql_default", table_name="conversations"),
),
) )
defaults.update(overrides) defaults.update(overrides)
return StackRunConfig(**defaults) return StackRunConfig(**defaults)

View file

@ -16,8 +16,18 @@ from llama_stack.apis.inference import (
OpenAIUserMessageParam, OpenAIUserMessageParam,
Order, Order,
) )
from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack.providers.utils.inference.inference_store import InferenceStore
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
@pytest.fixture(autouse=True)
def setup_backends(tmp_path):
"""Register SQL store backends for testing."""
db_path = str(tmp_path / "test.db")
register_sqlstore_backends({
"sql_default": SqliteSqlStoreConfig(db_path=db_path)
})
def create_test_chat_completion( def create_test_chat_completion(
@ -44,167 +54,162 @@ def create_test_chat_completion(
async def test_inference_store_pagination_basic(): async def test_inference_store_pagination_basic():
"""Test basic pagination functionality.""" """Test basic pagination functionality."""
with TemporaryDirectory() as tmp_dir: reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
db_path = tmp_dir + "/test.db" store = InferenceStore(reference, policy=[])
store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) await store.initialize()
await store.initialize()
# Create test data with different timestamps # Create test data with different timestamps
base_time = int(time.time()) base_time = int(time.time())
test_data = [ test_data = [
("zebra-task", base_time + 1), ("zebra-task", base_time + 1),
("apple-job", base_time + 2), ("apple-job", base_time + 2),
("moon-work", base_time + 3), ("moon-work", base_time + 3),
("banana-run", base_time + 4), ("banana-run", base_time + 4),
("car-exec", base_time + 5), ("car-exec", base_time + 5),
] ]
# Store test chat completions # Store test chat completions
for completion_id, timestamp in test_data: for completion_id, timestamp in test_data:
completion = create_test_chat_completion(completion_id, timestamp) completion = create_test_chat_completion(completion_id, timestamp)
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages) await store.store_chat_completion(completion, input_messages)
# Wait for all queued writes to complete # Wait for all queued writes to complete
await store.flush() await store.flush()
# Test 1: First page with limit=2, descending order (default) # Test 1: First page with limit=2, descending order (default)
result = await store.list_chat_completions(limit=2, order=Order.desc) result = await store.list_chat_completions(limit=2, order=Order.desc)
assert len(result.data) == 2 assert len(result.data) == 2
assert result.data[0].id == "car-exec" # Most recent first assert result.data[0].id == "car-exec" # Most recent first
assert result.data[1].id == "banana-run" assert result.data[1].id == "banana-run"
assert result.has_more is True assert result.has_more is True
assert result.last_id == "banana-run" assert result.last_id == "banana-run"
# Test 2: Second page using 'after' parameter # Test 2: Second page using 'after' parameter
result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc) result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc)
assert len(result2.data) == 2 assert len(result2.data) == 2
assert result2.data[0].id == "moon-work" assert result2.data[0].id == "moon-work"
assert result2.data[1].id == "apple-job" assert result2.data[1].id == "apple-job"
assert result2.has_more is True assert result2.has_more is True
# Test 3: Final page # Test 3: Final page
result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc) result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc)
assert len(result3.data) == 1 assert len(result3.data) == 1
assert result3.data[0].id == "zebra-task" assert result3.data[0].id == "zebra-task"
assert result3.has_more is False assert result3.has_more is False
async def test_inference_store_pagination_ascending(): async def test_inference_store_pagination_ascending():
"""Test pagination with ascending order.""" """Test pagination with ascending order."""
with TemporaryDirectory() as tmp_dir: reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
db_path = tmp_dir + "/test.db" store = InferenceStore(reference, policy=[])
store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) await store.initialize()
await store.initialize()
# Create test data # Create test data
base_time = int(time.time()) base_time = int(time.time())
test_data = [ test_data = [
("delta-item", base_time + 1), ("delta-item", base_time + 1),
("charlie-task", base_time + 2), ("charlie-task", base_time + 2),
("alpha-work", base_time + 3), ("alpha-work", base_time + 3),
] ]
# Store test chat completions # Store test chat completions
for completion_id, timestamp in test_data: for completion_id, timestamp in test_data:
completion = create_test_chat_completion(completion_id, timestamp) completion = create_test_chat_completion(completion_id, timestamp)
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages) await store.store_chat_completion(completion, input_messages)
# Wait for all queued writes to complete # Wait for all queued writes to complete
await store.flush() await store.flush()
# Test ascending order pagination # Test ascending order pagination
result = await store.list_chat_completions(limit=1, order=Order.asc) result = await store.list_chat_completions(limit=1, order=Order.asc)
assert len(result.data) == 1 assert len(result.data) == 1
assert result.data[0].id == "delta-item" # Oldest first assert result.data[0].id == "delta-item" # Oldest first
assert result.has_more is True assert result.has_more is True
# Second page with ascending order # Second page with ascending order
result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc) result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc)
assert len(result2.data) == 1 assert len(result2.data) == 1
assert result2.data[0].id == "charlie-task" assert result2.data[0].id == "charlie-task"
assert result2.has_more is True assert result2.has_more is True
async def test_inference_store_pagination_with_model_filter(): async def test_inference_store_pagination_with_model_filter():
"""Test pagination combined with model filtering.""" """Test pagination combined with model filtering."""
with TemporaryDirectory() as tmp_dir: reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
db_path = tmp_dir + "/test.db" store = InferenceStore(reference, policy=[])
store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) await store.initialize()
await store.initialize()
# Create test data with different models # Create test data with different models
base_time = int(time.time()) base_time = int(time.time())
test_data = [ test_data = [
("xyz-task", base_time + 1, "model-a"), ("xyz-task", base_time + 1, "model-a"),
("def-work", base_time + 2, "model-b"), ("def-work", base_time + 2, "model-b"),
("pqr-job", base_time + 3, "model-a"), ("pqr-job", base_time + 3, "model-a"),
("abc-run", base_time + 4, "model-b"), ("abc-run", base_time + 4, "model-b"),
] ]
# Store test chat completions # Store test chat completions
for completion_id, timestamp, model in test_data: for completion_id, timestamp, model in test_data:
completion = create_test_chat_completion(completion_id, timestamp, model) completion = create_test_chat_completion(completion_id, timestamp, model)
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages) await store.store_chat_completion(completion, input_messages)
# Wait for all queued writes to complete # Wait for all queued writes to complete
await store.flush() await store.flush()
# Test pagination with model filter # Test pagination with model filter
result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc) result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
assert len(result.data) == 1 assert len(result.data) == 1
assert result.data[0].id == "pqr-job" # Most recent model-a assert result.data[0].id == "pqr-job" # Most recent model-a
assert result.data[0].model == "model-a" assert result.data[0].model == "model-a"
assert result.has_more is True assert result.has_more is True
# Second page with model filter # Second page with model filter
result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc) result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc)
assert len(result2.data) == 1 assert len(result2.data) == 1
assert result2.data[0].id == "xyz-task" assert result2.data[0].id == "xyz-task"
assert result2.data[0].model == "model-a" assert result2.data[0].model == "model-a"
assert result2.has_more is False assert result2.has_more is False
async def test_inference_store_pagination_invalid_after(): async def test_inference_store_pagination_invalid_after():
"""Test error handling for invalid 'after' parameter.""" """Test error handling for invalid 'after' parameter."""
with TemporaryDirectory() as tmp_dir: reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
db_path = tmp_dir + "/test.db" store = InferenceStore(reference, policy=[])
store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) await store.initialize()
await store.initialize()
# Try to paginate with non-existent ID # Try to paginate with non-existent ID
with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"): with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"):
await store.list_chat_completions(after="non-existent", limit=2) await store.list_chat_completions(after="non-existent", limit=2)
async def test_inference_store_pagination_no_limit(): async def test_inference_store_pagination_no_limit():
"""Test pagination behavior when no limit is specified.""" """Test pagination behavior when no limit is specified."""
with TemporaryDirectory() as tmp_dir: reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
db_path = tmp_dir + "/test.db" store = InferenceStore(reference, policy=[])
store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) await store.initialize()
await store.initialize()
# Create test data # Create test data
base_time = int(time.time()) base_time = int(time.time())
test_data = [ test_data = [
("omega-first", base_time + 1), ("omega-first", base_time + 1),
("beta-second", base_time + 2), ("beta-second", base_time + 2),
] ]
# Store test chat completions # Store test chat completions
for completion_id, timestamp in test_data: for completion_id, timestamp in test_data:
completion = create_test_chat_completion(completion_id, timestamp) completion = create_test_chat_completion(completion_id, timestamp)
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages) await store.store_chat_completion(completion, input_messages)
# Wait for all queued writes to complete # Wait for all queued writes to complete
await store.flush() await store.flush()
# Test without limit # Test without limit
result = await store.list_chat_completions(order=Order.desc) result = await store.list_chat_completions(order=Order.desc)
assert len(result.data) == 2 assert len(result.data) == 2
assert result.data[0].id == "beta-second" # Most recent first assert result.data[0].id == "beta-second" # Most recent first
assert result.data[1].id == "omega-first" assert result.data[1].id == "omega-first"
assert result.has_more is False assert result.has_more is False