From 2bba56a0a842092dbcc253ab1483d741729d0da6 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 19 Oct 2025 08:06:19 -0700 Subject: [PATCH] group all server stores under storage.stores --- .../k8s-benchmark/stack-configmap.yaml | 40 +-- .../k8s-benchmark/stack_run_config.yaml | 39 ++- docs/docs/distributions/configuration.mdx | 30 +- .../distributions/k8s/stack-configmap.yaml | 201 ++++++++++---- .../distributions/k8s/stack_run_config.yaml | 39 ++- llama_stack/cli/stack/_build.py | 29 +- llama_stack/core/configure.py | 31 +++ .../core/conversations/conversations.py | 4 +- llama_stack/core/datatypes.py | 28 +- llama_stack/core/prompts/prompts.py | 4 +- llama_stack/core/routers/__init__.py | 4 +- llama_stack/core/stack.py | 7 +- llama_stack/core/storage/datatypes.py | 27 +- llama_stack/distributions/ci-tests/run.yaml | 23 +- .../distributions/dell/run-with-safety.yaml | 23 +- llama_stack/distributions/dell/run.yaml | 23 +- .../meta-reference-gpu/run-with-safety.yaml | 23 +- .../distributions/meta-reference-gpu/run.yaml | 23 +- .../distributions/nvidia/run-with-safety.yaml | 23 +- llama_stack/distributions/nvidia/run.yaml | 23 +- .../distributions/open-benchmark/run.yaml | 23 +- .../distributions/postgres-demo/run.yaml | 23 +- .../distributions/starter-gpu/run.yaml | 23 +- llama_stack/distributions/starter/run.yaml | 23 +- llama_stack/distributions/template.py | 36 +-- llama_stack/distributions/watsonx/run.yaml | 23 +- .../test_persistence_integration.py | 30 +- tests/unit/cli/test_stack_config.py | 42 +++ .../unit/conversations/test_conversations.py | 35 ++- tests/unit/core/test_storage_references.py | 45 +-- tests/unit/distribution/test_distribution.py | 25 +- tests/unit/prompts/prompts/conftest.py | 11 +- .../agent/test_meta_reference_agent.py | 39 ++- tests/unit/server/test_resolver.py | 26 +- .../utils/inference/test_inference_store.py | 261 +++++++++--------- 35 files changed, 806 insertions(+), 503 deletions(-) diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml index bb8a48d65..e1ca170f5 100644 --- a/benchmarking/k8s-benchmark/stack-configmap.yaml +++ b/benchmarking/k8s-benchmark/stack-configmap.yaml @@ -98,21 +98,30 @@ data: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} - metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore - inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 @@ -137,5 +146,4 @@ data: port: 8323 kind: ConfigMap metadata: - creationTimestamp: null name: llama-stack-config diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml index e2fbfd7a4..2ccaa21aa 100644 --- a/benchmarking/k8s-benchmark/stack_run_config.yaml +++ b/benchmarking/k8s-benchmark/stack_run_config.yaml @@ -95,21 +95,30 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index 81243c97b..bf3156865 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -44,18 +44,32 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + persistence: + agent_state: + backend: kv_default + namespace: agents + responses: + backend: sql_default + table_name: responses telemetry: - provider_id: meta-reference provider_type: inline::meta-reference config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db +storage: + backends: + kv_default: + type: kv_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db + sql_default: + type: sql_sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml index 3dbb0da97..c71ab05d8 100644 --- a/docs/docs/distributions/k8s/stack-configmap.yaml +++ b/docs/docs/distributions/k8s/stack-configmap.yaml @@ -1,56 +1,155 @@ apiVersion: v1 data: - stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n- - inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n - \ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n - \ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens: - ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify: - ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type: - remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n - \ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n - \ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n - \ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n - \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n - \ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n - \ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n - \ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n - \ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id: - meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir: - ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n - \ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db - \ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n - \ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n - \ provider_type: inline::meta-reference\n config:\n persistence_store:\n - \ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port: - ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: - ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n - \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n - \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n - \ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks: - ${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n - \ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n - \ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n - \ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results: - 3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config: - {}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n - \ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n - \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: - ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n - \ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host: - ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n - \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n- - metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id: - sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n - \ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id: - ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n - \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs: - []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id: - builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n - \ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n - \ type: github_token\n" + stack_run_config.yaml: | + version: '2' + image_name: kubernetes-demo + apis: + - agents + - inference + - files + - safety + - telemetry + - tool_runtime + - vector_io + providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: vllm-safety + provider_type: remote::vllm + config: + url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + kvstore: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + responses_store: + type: postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:+} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:+} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store + models: + - metadata: + embedding_dimension: 768 + model_id: nomic-embed-text-v1.5 + provider_id: sentence-transformers + model_type: embedding + - metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm + - metadata: {} + model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + provider_id: vllm-safety + model_type: llm + shields: + - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime + server: + port: 8321 + auth: + provider_config: + type: github_token kind: ConfigMap metadata: - creationTimestamp: null name: llama-stack-config diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml index ee28a1ea8..863565fdf 100644 --- a/docs/docs/distributions/k8s/stack_run_config.yaml +++ b/docs/docs/distributions/k8s/stack_run_config.yaml @@ -93,21 +93,30 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} -metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore -inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + references: + metadata: + backend: kv_default + namespace: registry + inference: + backend: sql_default + table_name: inference_store models: - metadata: embedding_dimension: 768 diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index cf5ed55ae..9806e4c48 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -43,6 +43,7 @@ from llama_stack.core.stack import replace_env_vars from llama_stack.core.storage.datatypes import ( InferenceStoreReference, KVStoreReference, + ServerStoresConfig, SqliteKVStoreConfig, SqliteSqlStoreConfig, SqlStoreReference, @@ -302,7 +303,21 @@ def _generate_run_config( "sql_default": SqliteSqlStoreConfig( db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db", ), - } + }, + stores=ServerStoresConfig( + metadata=KVStoreReference( + backend="kv_default", + namespace="registry", + ), + inference=InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ), + conversations=SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ), + ), ) run_config = StackRunConfig( @@ -311,18 +326,6 @@ def _generate_run_config( apis=apis, providers={}, storage=storage, - metadata_store=KVStoreReference( - backend="kv_default", - namespace="registry", - ), - inference_store=InferenceStoreReference( - backend="sql_default", - table_name="inference_store", - ), - conversations_store=SqlStoreReference( - backend="sql_default", - table_name="openai_conversations", - ), external_providers_dir=build_config.external_providers_dir if build_config.external_providers_dir else EXTERNAL_PROVIDERS_DIR, diff --git a/llama_stack/core/configure.py b/llama_stack/core/configure.py index bfa2c6d71..734839ea9 100644 --- a/llama_stack/core/configure.py +++ b/llama_stack/core/configure.py @@ -159,6 +159,37 @@ def upgrade_from_routing_table( config_dict["apis"] = config_dict["apis_to_serve"] config_dict.pop("apis_to_serve", None) + # Add default storage config if not present + if "storage" not in config_dict: + config_dict["storage"] = { + "backends": { + "kv_default": { + "type": "kv_sqlite", + "db_path": "~/.llama/kvstore.db", + }, + "sql_default": { + "type": "sql_sqlite", + "db_path": "~/.llama/sql_store.db", + }, + }, + "stores": { + "metadata": { + "namespace": "registry", + "backend": "kv_default", + }, + "inference": { + "table_name": "inference_store", + "backend": "sql_default", + "max_write_queue_size": 10000, + "num_writers": 4, + }, + "conversations": { + "table_name": "openai_conversations", + "backend": "sql_default", + }, + }, + } + return config_dict diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py index ef2bca7e3..66880ca36 100644 --- a/llama_stack/core/conversations/conversations.py +++ b/llama_stack/core/conversations/conversations.py @@ -56,9 +56,9 @@ class ConversationServiceImpl(Conversations): self.policy = config.policy # Use conversations store reference from run config - conversations_ref = config.run_config.conversations_store + conversations_ref = config.run_config.storage.stores.conversations if not conversations_ref: - raise ValueError("conversations_store must be configured in run config") + raise ValueError("storage.stores.conversations must be configured in run config") base_sql_store = sqlstore_impl(conversations_ref) self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy) diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index a813b6084..d692da3b3 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -27,9 +27,7 @@ from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, KVStoreReference, - SqlStoreReference, StorageBackendType, StorageConfig, ) @@ -470,19 +468,7 @@ can be instantiated multiple times (with different configs) if necessary. """, ) storage: StorageConfig = Field( - description="Catalog of named storage backends available to the stack", - ) - metadata_store: KVStoreReference | None = Field( - default=None, - description="Reference to the KV store backend used by the distribution registry (kv_* backend).", - ) - inference_store: InferenceStoreReference | None = Field( - default=None, - description="Reference to the SQL store backend used by the inference API (sql_* backend).", - ) - conversations_store: SqlStoreReference | None = Field( - default=None, - description="Reference to the SQL store backend used by the conversations API (sql_* backend).", + description="Catalog of named storage backends and references available to the stack", ) # registry of "resources" in the distribution @@ -523,8 +509,9 @@ can be instantiated multiple times (with different configs) if necessary. return v @model_validator(mode="after") - def validate_storage_references(self) -> "StackRunConfig": - backend_map = self.storage.backends if self.storage else {} + def validate_server_stores(self) -> "StackRunConfig": + backend_map = self.storage.backends + stores = self.storage.stores kv_backends = { name for name, cfg in backend_map.items() @@ -558,9 +545,10 @@ can be instantiated multiple times (with different configs) if necessary. f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required." ) - _ensure_backend(self.metadata_store, kv_backends, "metadata_store") - _ensure_backend(self.inference_store, sql_backends, "inference_store") - _ensure_backend(self.conversations_store, sql_backends, "conversations_store") + _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata") + _ensure_backend(stores.inference, sql_backends, "storage.stores.inference") + _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations") + _ensure_backend(stores.responses, sql_backends, "storage.stores.responses") return self diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py index ee8c42596..856397ca5 100644 --- a/llama_stack/core/prompts/prompts.py +++ b/llama_stack/core/prompts/prompts.py @@ -41,9 +41,9 @@ class PromptServiceImpl(Prompts): async def initialize(self) -> None: # Use metadata store backend with prompts-specific namespace - metadata_ref = self.config.run_config.metadata_store + metadata_ref = self.config.run_config.storage.stores.metadata if not metadata_ref: - raise ValueError("metadata_store must be configured in run config") + raise ValueError("storage.stores.metadata must be configured in run config") prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend) self.kvstore = await kvstore_impl(prompts_ref) diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py index 62da054de..0573fc2c7 100644 --- a/llama_stack/core/routers/__init__.py +++ b/llama_stack/core/routers/__init__.py @@ -80,9 +80,9 @@ async def get_auto_router_impl( # TODO: move pass configs to routers instead if api == Api.inference: - inference_ref = run_config.inference_store + inference_ref = run_config.storage.stores.inference if not inference_ref: - raise ValueError("inference_store must be configured in run config") + raise ValueError("storage.stores.inference must be configured in run config") inference_store = InferenceStore( reference=inference_ref, diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index 95e9a28b0..1222eff77 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -368,9 +368,10 @@ class Stack: logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}") _initialize_storage(self.run_config) - if not self.run_config.metadata_store: - raise ValueError("metadata_store must be configured with a kv_* backend") - dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name) + stores = self.run_config.storage.stores + if not stores.metadata: + raise ValueError("storage.stores.metadata must be configured with a kv_* backend") + dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name) policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else [] internal_impls = {} diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py index 4c3f1b99e..9df170e10 100644 --- a/llama_stack/core/storage/datatypes.py +++ b/llama_stack/core/storage/datatypes.py @@ -72,7 +72,7 @@ class SqliteKVStoreConfig(CommonConfig): class PostgresKVStoreConfig(CommonConfig): type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES host: str = "localhost" - port: int = 5432 + port: int | str = 5432 db: str = "llamastack" user: str password: str | None = None @@ -175,7 +175,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES host: str = "localhost" - port: int = 5432 + port: int | str = 5432 db: str = "llamastack" user: str password: str | None = None @@ -254,7 +254,30 @@ class ResponsesStoreReference(InferenceStoreReference): """Responses store configuration with queue tuning.""" +class ServerStoresConfig(BaseModel): + metadata: KVStoreReference | None = Field( + default=None, + description="Metadata store configuration (uses KV backend)", + ) + inference: InferenceStoreReference | None = Field( + default=None, + description="Inference store configuration (uses SQL backend)", + ) + conversations: SqlStoreReference | None = Field( + default=None, + description="Conversations store configuration (uses SQL backend)", + ) + responses: ResponsesStoreReference | None = Field( + default=None, + description="Responses store configuration (uses SQL backend)", + ) + + class StorageConfig(BaseModel): backends: dict[str, StorageBackendConfig] = Field( description="Named backend configurations (e.g., 'default', 'cache')", ) + stores: ServerStoresConfig = Field( + default_factory=lambda: ServerStoresConfig(), + description="Named references to storage backends used by the stack core", + ) diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index de950d1b3..3e6f20900 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -220,17 +220,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: - shield_id: llama-guard diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml index a705afe94..a246755cc 100644 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ b/llama_stack/distributions/dell/run-with-safety.yaml @@ -97,17 +97,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml index e1feb0c75..aa911c92f 100644 --- a/llama_stack/distributions/dell/run.yaml +++ b/llama_stack/distributions/dell/run.yaml @@ -93,17 +93,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 517886dba..c8426fe1b 100644 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -110,17 +110,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml index 0133d5cd3..827d6d07f 100644 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -100,17 +100,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml index ae705977b..984b86578 100644 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -99,17 +99,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml index 65bc0ce9a..f3f4b73e4 100644 --- a/llama_stack/distributions/nvidia/run.yaml +++ b/llama_stack/distributions/nvidia/run.yaml @@ -88,17 +88,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: [] vector_dbs: [] diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml index e15cbedd8..ddaf3688d 100644 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ b/llama_stack/distributions/open-benchmark/run.yaml @@ -130,17 +130,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: gpt-4o diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml index df979a8fe..7831b403d 100644 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ b/llama_stack/distributions/postgres-demo/run.yaml @@ -75,17 +75,18 @@ storage: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 357495368..f69ae2733 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -223,17 +223,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: - shield_id: llama-guard diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index cc5fc92c4..99c425e5f 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -220,17 +220,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: - shield_id: llama-guard diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py index f7671719c..542c7bea9 100644 --- a/llama_stack/distributions/template.py +++ b/llama_stack/distributions/template.py @@ -188,6 +188,7 @@ class RunConfigSettings(BaseModel): default_benchmarks: list[BenchmarkInput] | None = None telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True)) storage_backends: dict[str, Any] | None = None + storage_stores: dict[str, Any] | None = None def run_config( self, @@ -241,19 +242,25 @@ class RunConfigSettings(BaseModel): ), } - storage_config = dict(backends=storage_backends) - metadata_store = KVStoreReference( - backend="kv_default", - namespace="registry", - ).model_dump(exclude_none=True) - inference_store = InferenceStoreReference( - backend="sql_default", - table_name="inference_store", - ).model_dump(exclude_none=True) - conversations_store = SqlStoreReference( - backend="sql_default", - table_name="openai_conversations", - ).model_dump(exclude_none=True) + storage_stores = self.storage_stores or { + "metadata": KVStoreReference( + backend="kv_default", + namespace="registry", + ).model_dump(exclude_none=True), + "inference": InferenceStoreReference( + backend="sql_default", + table_name="inference_store", + ).model_dump(exclude_none=True), + "conversations": SqlStoreReference( + backend="sql_default", + table_name="openai_conversations", + ).model_dump(exclude_none=True), + } + + storage_config = dict( + backends=storage_backends, + stores=storage_stores, + ) # Return a dict that matches StackRunConfig structure return { @@ -263,9 +270,6 @@ class RunConfigSettings(BaseModel): "apis": apis, "providers": provider_configs, "storage": storage_config, - "metadata_store": metadata_store, - "inference_store": inference_store, - "conversations_store": conversations_store, "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], "vector_dbs": [], diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml index 5a5343e88..f05f2d17c 100644 --- a/llama_stack/distributions/watsonx/run.yaml +++ b/llama_stack/distributions/watsonx/run.yaml @@ -103,17 +103,18 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db -metadata_store: - namespace: registry - backend: kv_default -inference_store: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 -conversations_store: - table_name: openai_conversations - backend: sql_default + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default models: [] shields: [] vector_dbs: [] diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py index dcc4cb1af..e9b80dc0c 100644 --- a/tests/integration/test_persistence_integration.py +++ b/tests/integration/test_persistence_integration.py @@ -29,19 +29,20 @@ def test_starter_distribution_config_loads_and_resolves(): assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig) assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig) - assert config.metadata_store is not None - assert config.metadata_store.backend == "kv_default" - assert config.metadata_store.namespace == "registry" + stores = config.storage.stores + assert stores.metadata is not None + assert stores.metadata.backend == "kv_default" + assert stores.metadata.namespace == "registry" - assert config.inference_store is not None - assert config.inference_store.backend == "sql_default" - assert config.inference_store.table_name == "inference_store" - assert config.inference_store.max_write_queue_size > 0 - assert config.inference_store.num_writers > 0 + assert stores.inference is not None + assert stores.inference.backend == "sql_default" + assert stores.inference.table_name == "inference_store" + assert stores.inference.max_write_queue_size > 0 + assert stores.inference.num_writers > 0 - assert config.conversations_store is not None - assert config.conversations_store.backend == "sql_default" - assert config.conversations_store.table_name == "openai_conversations" + assert stores.conversations is not None + assert stores.conversations.backend == "sql_default" + assert stores.conversations.table_name == "openai_conversations" def test_postgres_demo_distribution_config_loads(): @@ -62,6 +63,9 @@ def test_postgres_demo_distribution_config_loads(): kv_backend = config.storage.backends["kv_default"] assert isinstance(kv_backend, PostgresKVStoreConfig) + stores = config.storage.stores # Stores target the Postgres backends explicitly - assert config.metadata_store.backend == "kv_default" - assert config.inference_store.backend == "sql_default" + assert stores.metadata is not None + assert stores.metadata.backend == "kv_default" + assert stores.inference is not None + assert stores.inference.backend == "sql_default" diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index daaf229e5..7b9f3ca0c 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -23,6 +23,27 @@ def config_with_image_name_int(): image_name: 1234 apis_to_serve: [] built_at: {datetime.now().isoformat()} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/test_kv.db + sql_default: + type: sql_sqlite + db_path: /tmp/test_sql.db + stores: + metadata: + backend: kv_default + namespace: metadata + inference: + backend: sql_default + table_name: inference + conversations: + backend: sql_default + table_name: conversations + responses: + backend: sql_default + table_name: responses providers: inference: - provider_id: provider1 @@ -54,6 +75,27 @@ def up_to_date_config(): image_name: foo apis_to_serve: [] built_at: {datetime.now().isoformat()} + storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/test_kv.db + sql_default: + type: sql_sqlite + db_path: /tmp/test_sql.db + stores: + metadata: + backend: kv_default + namespace: metadata + inference: + backend: sql_default + table_name: inference + conversations: + backend: sql_default + table_name: conversations + responses: + backend: sql_default + table_name: responses providers: inference: - provider_id: provider1 diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py index 65c3e2333..ff6dd243d 100644 --- a/tests/unit/conversations/test_conversations.py +++ b/tests/unit/conversations/test_conversations.py @@ -20,7 +20,14 @@ from llama_stack.core.conversations.conversations import ( ConversationServiceConfig, ConversationServiceImpl, ) -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.storage.datatypes import ( + ServerStoresConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageConfig, +) +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends @pytest.fixture @@ -28,7 +35,18 @@ async def service(): with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test_conversations.db" - config = ConversationServiceConfig(conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=[]) + storage = StorageConfig( + backends={ + "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)), + }, + stores=ServerStoresConfig( + conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"), + ), + ) + register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) + run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + + config = ConversationServiceConfig(run_config=run_config, policy=[]) service = ConversationServiceImpl(config, {}) await service.initialize() yield service @@ -121,9 +139,18 @@ async def test_policy_configuration(): AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*")) ] - config = ConversationServiceConfig( - conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=restrictive_policy + storage = StorageConfig( + backends={ + "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)), + }, + stores=ServerStoresConfig( + conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"), + ), ) + register_sqlstore_backends({"sql_test": storage.backends["sql_test"]}) + run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage) + + config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy) service = ConversationServiceImpl(config, {}) await service.initialize() diff --git a/tests/unit/core/test_storage_references.py b/tests/unit/core/test_storage_references.py index 206b90304..7bceba74d 100644 --- a/tests/unit/core/test_storage_references.py +++ b/tests/unit/core/test_storage_references.py @@ -16,6 +16,7 @@ from llama_stack.core.datatypes import ( from llama_stack.core.storage.datatypes import ( InferenceStoreReference, KVStoreReference, + ServerStoresConfig, SqliteKVStoreConfig, SqliteSqlStoreConfig, SqlStoreReference, @@ -24,13 +25,30 @@ from llama_stack.core.storage.datatypes import ( def _base_run_config(**overrides): + metadata_reference = overrides.pop( + "metadata_reference", + KVStoreReference(backend="kv_default", namespace="registry"), + ) + inference_reference = overrides.pop( + "inference_reference", + InferenceStoreReference(backend="sql_default", table_name="inference"), + ) + conversations_reference = overrides.pop( + "conversations_reference", + SqlStoreReference(backend="sql_default", table_name="conversations"), + ) storage = overrides.pop( "storage", StorageConfig( backends={ "kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"), "sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"), - } + }, + stores=ServerStoresConfig( + metadata=metadata_reference, + inference=inference_reference, + conversations=conversations_reference, + ), ), ) return StackRunConfig( @@ -39,39 +57,28 @@ def _base_run_config(**overrides): apis=[], providers={}, storage=storage, - metadata_store=overrides.pop( - "metadata_store", - KVStoreReference(backend="kv_default", namespace="registry"), - ), - inference_store=overrides.pop( - "inference_store", - InferenceStoreReference(backend="sql_default", table_name="inference"), - ), - conversations_store=overrides.pop( - "conversations_store", - SqlStoreReference(backend="sql_default", table_name="conversations"), - ), **overrides, ) def test_references_require_known_backend(): with pytest.raises(ValidationError, match="unknown backend 'missing'"): - _base_run_config(metadata_store=KVStoreReference(backend="missing", namespace="registry")) + _base_run_config(metadata_reference=KVStoreReference(backend="missing", namespace="registry")) def test_references_must_match_backend_family(): with pytest.raises(ValidationError, match="kv_.* is required"): - _base_run_config(metadata_store=KVStoreReference(backend="sql_default", namespace="registry")) + _base_run_config(metadata_reference=KVStoreReference(backend="sql_default", namespace="registry")) with pytest.raises(ValidationError, match="sql_.* is required"): _base_run_config( - inference_store=InferenceStoreReference(backend="kv_default", table_name="inference"), + inference_reference=InferenceStoreReference(backend="kv_default", table_name="inference"), ) def test_valid_configuration_passes_validation(): config = _base_run_config() - assert config.metadata_store.backend == "kv_default" - assert config.inference_store.backend == "sql_default" - assert config.conversations_store.backend == "sql_default" + stores = config.storage.stores + assert stores.metadata is not None and stores.metadata.backend == "kv_default" + assert stores.inference is not None and stores.inference.backend == "sql_default" + assert stores.conversations is not None and stores.conversations.backend == "sql_default" diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index 788585328..3b0643a13 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -16,6 +16,7 @@ from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry, from llama_stack.core.storage.datatypes import ( InferenceStoreReference, KVStoreReference, + ServerStoresConfig, SqliteKVStoreConfig, SqliteSqlStoreConfig, SqlStoreReference, @@ -42,35 +43,25 @@ def _default_storage() -> StorageConfig: backends={ "kv_default": SqliteKVStoreConfig(db_path=":memory:"), "sql_default": SqliteSqlStoreConfig(db_path=":memory:"), - } + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="conversations"), + ), ) def make_stack_config(**overrides) -> StackRunConfig: storage = overrides.pop("storage", _default_storage()) - metadata_store = overrides.pop( - "metadata_store", - KVStoreReference(backend="kv_default", namespace="registry"), - ) - inference_store = overrides.pop( - "inference_store", - InferenceStoreReference(backend="sql_default", table_name="inference_store"), - ) - conversations_store = overrides.pop( - "conversations_store", - SqlStoreReference(backend="sql_default", table_name="conversations"), - ) defaults = dict( image_name="test_image", apis=[], providers={}, storage=storage, - metadata_store=metadata_store, - inference_store=inference_store, - conversations_store=conversations_store, ) defaults.update(overrides) - return make_stack_config(**defaults) + return StackRunConfig(**defaults) @pytest.fixture diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py index 0fa52193b..fe30e1a77 100644 --- a/tests/unit/prompts/prompts/conftest.py +++ b/tests/unit/prompts/prompts/conftest.py @@ -12,6 +12,7 @@ from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceI from llama_stack.core.storage.datatypes import ( InferenceStoreReference, KVStoreReference, + ServerStoresConfig, SqliteKVStoreConfig, SqliteSqlStoreConfig, SqlStoreReference, @@ -32,16 +33,18 @@ async def temp_prompt_store(tmp_path_factory): backends={ "kv_test": SqliteKVStoreConfig(db_path=db_path), "sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")), - } + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_test", namespace="registry"), + inference=InferenceStoreReference(backend="sql_test", table_name="inference"), + conversations=SqlStoreReference(backend="sql_test", table_name="conversations"), + ), ) mock_run_config = StackRunConfig( image_name="test-distribution", apis=[], providers={}, storage=storage, - metadata_store=KVStoreReference(backend="kv_test", namespace="registry"), - inference_store=InferenceStoreReference(backend="sql_test", table_name="inference"), - conversations_store=SqlStoreReference(backend="sql_test", table_name="conversations"), ) config = PromptServiceConfig(run_config=mock_run_config) store = PromptServiceImpl(config, deps={}) diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py index cfb3e1327..48fb7083a 100644 --- a/tests/unit/providers/agent/test_meta_reference_agent.py +++ b/tests/unit/providers/agent/test_meta_reference_agent.py @@ -26,6 +26,24 @@ from llama_stack.providers.inline.agents.meta_reference.config import MetaRefere from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo +@pytest.fixture(autouse=True) +def setup_backends(tmp_path): + """Register KV and SQL store backends for testing.""" + from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqliteKVStoreConfig + from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends + from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + kv_path = str(tmp_path / "test_kv.db") + sql_path = str(tmp_path / "test_sql.db") + + register_kvstore_backends({ + "kv_default": SqliteKVStoreConfig(db_path=kv_path) + }) + register_sqlstore_backends({ + "sql_default": SqliteSqlStoreConfig(db_path=sql_path) + }) + + @pytest.fixture def mock_apis(): return { @@ -40,15 +58,20 @@ def mock_apis(): @pytest.fixture def config(tmp_path): + from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference + from llama_stack.providers.inline.agents.meta_reference.config import AgentPersistenceConfig + return MetaReferenceAgentsImplConfig( - persistence_store={ - "type": "sqlite", - "db_path": str(tmp_path / "test.db"), - }, - responses_store={ - "type": "sqlite", - "db_path": str(tmp_path / "test.db"), - }, + persistence=AgentPersistenceConfig( + agent_state=KVStoreReference( + backend="kv_default", + namespace="agents", + ), + responses=ResponsesStoreReference( + backend="sql_default", + table_name="responses", + ), + ) ) diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index 5620c7fa5..b44f12f7e 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -19,12 +19,15 @@ from llama_stack.core.routing_tables.models import ModelsRoutingTable from llama_stack.core.storage.datatypes import ( InferenceStoreReference, KVStoreReference, + ServerStoresConfig, SqliteKVStoreConfig, SqliteSqlStoreConfig, SqlStoreReference, StorageConfig, ) from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec +from llama_stack.providers.utils.kvstore import register_kvstore_backends +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None: @@ -76,26 +79,23 @@ def make_run_config(**overrides) -> StackRunConfig: backends={ "kv_default": SqliteKVStoreConfig(db_path=":memory:"), "sql_default": SqliteSqlStoreConfig(db_path=":memory:"), - } + }, + stores=ServerStoresConfig( + metadata=KVStoreReference(backend="kv_default", namespace="registry"), + inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), + conversations=SqlStoreReference(backend="sql_default", table_name="conversations"), + ), ), ) + register_kvstore_backends({name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("kv_")}) + register_sqlstore_backends( + {name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("sql_")} + ) defaults = dict( image_name="test_image", apis=[], providers={}, storage=storage, - metadata_store=overrides.pop( - "metadata_store", - KVStoreReference(backend="kv_default", namespace="registry"), - ), - inference_store=overrides.pop( - "inference_store", - InferenceStoreReference(backend="sql_default", table_name="inference_store"), - ), - conversations_store=overrides.pop( - "conversations_store", - SqlStoreReference(backend="sql_default", table_name="conversations"), - ), ) defaults.update(overrides) return StackRunConfig(**defaults) diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py index f6d63490a..4f6ba817e 100644 --- a/tests/unit/utils/inference/test_inference_store.py +++ b/tests/unit/utils/inference/test_inference_store.py @@ -16,8 +16,18 @@ from llama_stack.apis.inference import ( OpenAIUserMessageParam, Order, ) +from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.inference.inference_store import InferenceStore -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends + + +@pytest.fixture(autouse=True) +def setup_backends(tmp_path): + """Register SQL store backends for testing.""" + db_path = str(tmp_path / "test.db") + register_sqlstore_backends({ + "sql_default": SqliteSqlStoreConfig(db_path=db_path) + }) def create_test_chat_completion( @@ -44,167 +54,162 @@ def create_test_chat_completion( async def test_inference_store_pagination_basic(): """Test basic pagination functionality.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data with different timestamps - base_time = int(time.time()) - test_data = [ - ("zebra-task", base_time + 1), - ("apple-job", base_time + 2), - ("moon-work", base_time + 3), - ("banana-run", base_time + 4), - ("car-exec", base_time + 5), - ] + # Create test data with different timestamps + base_time = int(time.time()) + test_data = [ + ("zebra-task", base_time + 1), + ("apple-job", base_time + 2), + ("moon-work", base_time + 3), + ("banana-run", base_time + 4), + ("car-exec", base_time + 5), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test 1: First page with limit=2, descending order (default) - result = await store.list_chat_completions(limit=2, order=Order.desc) - assert len(result.data) == 2 - assert result.data[0].id == "car-exec" # Most recent first - assert result.data[1].id == "banana-run" - assert result.has_more is True - assert result.last_id == "banana-run" + # Test 1: First page with limit=2, descending order (default) + result = await store.list_chat_completions(limit=2, order=Order.desc) + assert len(result.data) == 2 + assert result.data[0].id == "car-exec" # Most recent first + assert result.data[1].id == "banana-run" + assert result.has_more is True + assert result.last_id == "banana-run" - # Test 2: Second page using 'after' parameter - result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc) - assert len(result2.data) == 2 - assert result2.data[0].id == "moon-work" - assert result2.data[1].id == "apple-job" - assert result2.has_more is True + # Test 2: Second page using 'after' parameter + result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc) + assert len(result2.data) == 2 + assert result2.data[0].id == "moon-work" + assert result2.data[1].id == "apple-job" + assert result2.has_more is True - # Test 3: Final page - result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc) - assert len(result3.data) == 1 - assert result3.data[0].id == "zebra-task" - assert result3.has_more is False + # Test 3: Final page + result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc) + assert len(result3.data) == 1 + assert result3.data[0].id == "zebra-task" + assert result3.has_more is False async def test_inference_store_pagination_ascending(): """Test pagination with ascending order.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data - base_time = int(time.time()) - test_data = [ - ("delta-item", base_time + 1), - ("charlie-task", base_time + 2), - ("alpha-work", base_time + 3), - ] + # Create test data + base_time = int(time.time()) + test_data = [ + ("delta-item", base_time + 1), + ("charlie-task", base_time + 2), + ("alpha-work", base_time + 3), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test ascending order pagination - result = await store.list_chat_completions(limit=1, order=Order.asc) - assert len(result.data) == 1 - assert result.data[0].id == "delta-item" # Oldest first - assert result.has_more is True + # Test ascending order pagination + result = await store.list_chat_completions(limit=1, order=Order.asc) + assert len(result.data) == 1 + assert result.data[0].id == "delta-item" # Oldest first + assert result.has_more is True - # Second page with ascending order - result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc) - assert len(result2.data) == 1 - assert result2.data[0].id == "charlie-task" - assert result2.has_more is True + # Second page with ascending order + result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc) + assert len(result2.data) == 1 + assert result2.data[0].id == "charlie-task" + assert result2.has_more is True async def test_inference_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data with different models - base_time = int(time.time()) - test_data = [ - ("xyz-task", base_time + 1, "model-a"), - ("def-work", base_time + 2, "model-b"), - ("pqr-job", base_time + 3, "model-a"), - ("abc-run", base_time + 4, "model-b"), - ] + # Create test data with different models + base_time = int(time.time()) + test_data = [ + ("xyz-task", base_time + 1, "model-a"), + ("def-work", base_time + 2, "model-b"), + ("pqr-job", base_time + 3, "model-a"), + ("abc-run", base_time + 4, "model-b"), + ] - # Store test chat completions - for completion_id, timestamp, model in test_data: - completion = create_test_chat_completion(completion_id, timestamp, model) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp, model in test_data: + completion = create_test_chat_completion(completion_id, timestamp, model) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test pagination with model filter - result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc) - assert len(result.data) == 1 - assert result.data[0].id == "pqr-job" # Most recent model-a - assert result.data[0].model == "model-a" - assert result.has_more is True + # Test pagination with model filter + result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc) + assert len(result.data) == 1 + assert result.data[0].id == "pqr-job" # Most recent model-a + assert result.data[0].model == "model-a" + assert result.has_more is True - # Second page with model filter - result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc) - assert len(result2.data) == 1 - assert result2.data[0].id == "xyz-task" - assert result2.data[0].model == "model-a" - assert result2.has_more is False + # Second page with model filter + result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc) + assert len(result2.data) == 1 + assert result2.data[0].id == "xyz-task" + assert result2.data[0].model == "model-a" + assert result2.has_more is False async def test_inference_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Try to paginate with non-existent ID - with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"): - await store.list_chat_completions(after="non-existent", limit=2) + # Try to paginate with non-existent ID + with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"): + await store.list_chat_completions(after="non-existent", limit=2) async def test_inference_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" - with TemporaryDirectory() as tmp_dir: - db_path = tmp_dir + "/test.db" - store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[]) - await store.initialize() + reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions") + store = InferenceStore(reference, policy=[]) + await store.initialize() - # Create test data - base_time = int(time.time()) - test_data = [ - ("omega-first", base_time + 1), - ("beta-second", base_time + 2), - ] + # Create test data + base_time = int(time.time()) + test_data = [ + ("omega-first", base_time + 1), + ("beta-second", base_time + 2), + ] - # Store test chat completions - for completion_id, timestamp in test_data: - completion = create_test_chat_completion(completion_id, timestamp) - input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] - await store.store_chat_completion(completion, input_messages) + # Store test chat completions + for completion_id, timestamp in test_data: + completion = create_test_chat_completion(completion_id, timestamp) + input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")] + await store.store_chat_completion(completion, input_messages) - # Wait for all queued writes to complete - await store.flush() + # Wait for all queued writes to complete + await store.flush() - # Test without limit - result = await store.list_chat_completions(order=Order.desc) - assert len(result.data) == 2 - assert result.data[0].id == "beta-second" # Most recent first - assert result.data[1].id == "omega-first" - assert result.has_more is False + # Test without limit + result = await store.list_chat_completions(order=Order.desc) + assert len(result.data) == 2 + assert result.data[0].id == "beta-second" # Most recent first + assert result.data[1].id == "omega-first" + assert result.has_more is False