fix: harden storage semantics (#4118)

Fixes issues in the storage system by guaranteeing immediate durability for responses and ensuring background writers stay alive. Three related fixes: * Responses to the OpenAI-compatible API now write directly to Postgres/SQLite inside the request instead of detouring through an async queue that might never drain; this restores the expected read-after-write behavior and removes the "response not found" races reported by users. * The access-control shim was stamping owner_principal/access_attributes as SQL NULL, which Postgres interprets as non-public rows; fixing it to use the empty-string/JSON-null pattern means conversations and responses stored without an authenticated user stay queryable (matching SQLite). * The inference-store queue remains for batching, but its worker tasks now start lazily on the live event loop so server startup doesn't cancel them—writes keep flowing even when the stack is launched via llama stack run. Closes #4115 Added a matrix entry to test our "base" suite against Postgres as the store.
2025-12-03 18:00:36 +00:00 · 2025-11-12 10:35:39 -08:00 · 2025-11-12 10:35:39 -08:00 · 81e44b06ff
commit 81e44b06ff
parent 56d87f5133
27 changed files with 1195 additions and 160 deletions
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@ -57,4 +57,5 @@ image_type: venv
 additional_pip_packages:
 - aiosqlite
 - asyncpg
+- psycopg2-binary
 - sqlalchemy[asyncio]
--- a/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/llama_stack/distributions/starter/run-with-postgres-store.yaml
@ -0,0 +1,293 @@
+version: 2
+image_name: starter
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+    config:
+      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@ -46,6 +46,9 @@ providers:
      api_key: ${env.TOGETHER_API_KEY:=}
  - provider_id: bedrock
    provider_type: remote::bedrock
+    config:
+      api_key: ${env.AWS_BEDROCK_API_KEY:=}
+      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
    provider_type: remote::nvidia
    config:
@ -247,6 +250,9 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
  models: []
  shields:
@ -274,3 +280,5 @@ vector_stores:
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@ -147,10 +147,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
            BuildProvider(provider_type="inline::reference"),
        ],
    }
+    files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
    files_provider = Provider(
        provider_id="meta-reference-files",
        provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        config=files_config,
    )
    embedding_provider = Provider(
        provider_id="sentence-transformers",
@ -180,6 +181,90 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
            provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
        ),
    ]
+    postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
+    postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
+    default_overrides = {
+        "inference": remote_inference_providers + [embedding_provider],
+        "vector_io": [
+            Provider(
+                provider_id="faiss",
+                provider_type="inline::faiss",
+                config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="sqlite-vec",
+                provider_type="inline::sqlite-vec",
+                config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="${env.MILVUS_URL:+milvus}",
+                provider_type="inline::milvus",
+                config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+            ),
+            Provider(
+                provider_id="${env.CHROMADB_URL:+chromadb}",
+                provider_type="remote::chromadb",
+                config=ChromaVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}/",
+                    url="${env.CHROMADB_URL:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.PGVECTOR_DB:+pgvector}",
+                provider_type="remote::pgvector",
+                config=PGVectorVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    db="${env.PGVECTOR_DB:=}",
+                    user="${env.PGVECTOR_USER:=}",
+                    password="${env.PGVECTOR_PASSWORD:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.QDRANT_URL:+qdrant}",
+                provider_type="remote::qdrant",
+                config=QdrantVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    url="${env.QDRANT_URL:=}",
+                ),
+            ),
+            Provider(
+                provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
+                provider_type="remote::weaviate",
+                config=WeaviateVectorIOConfig.sample_run_config(
+                    f"~/.llama/distributions/{name}",
+                    cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
+                ),
+            ),
+        ],
+        "files": [files_provider],
+    }
+
+    base_run_settings = RunConfigSettings(
+        provider_overrides=default_overrides,
+        default_models=[],
+        default_tool_groups=default_tool_groups,
+        default_shields=default_shields,
+        vector_stores_config=VectorStoresConfig(
+            default_provider_id="faiss",
+            default_embedding_model=QualifiedModel(
+                provider_id="sentence-transformers",
+                model_id="nomic-ai/nomic-embed-text-v1.5",
+            ),
+        ),
+        safety_config=SafetyConfig(
+            default_shield_id="llama-guard",
+        ),
+    )
+
+    postgres_run_settings = base_run_settings.model_copy(
+        update={
+            "storage_backends": {
+                "kv_default": postgres_kv_config,
+                "sql_default": postgres_sql_config,
+            }
+        },
+        deep=True,
+    )

    return DistributionTemplate(
        name=name,
@ -190,73 +275,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
        providers=providers,
        additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
        run_configs={
-            "run.yaml": RunConfigSettings(
-                provider_overrides={
-                    "inference": remote_inference_providers + [embedding_provider],
-                    "vector_io": [
-                        Provider(
-                            provider_id="faiss",
-                            provider_type="inline::faiss",
-                            config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="sqlite-vec",
-                            provider_type="inline::sqlite-vec",
-                            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.MILVUS_URL:+milvus}",
-                            provider_type="inline::milvus",
-                            config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
-                        ),
-                        Provider(
-                            provider_id="${env.CHROMADB_URL:+chromadb}",
-                            provider_type="remote::chromadb",
-                            config=ChromaVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}/",
-                                url="${env.CHROMADB_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.PGVECTOR_DB:+pgvector}",
-                            provider_type="remote::pgvector",
-                            config=PGVectorVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                db="${env.PGVECTOR_DB:=}",
-                                user="${env.PGVECTOR_USER:=}",
-                                password="${env.PGVECTOR_PASSWORD:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.QDRANT_URL:+qdrant}",
-                            provider_type="remote::qdrant",
-                            config=QdrantVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                url="${env.QDRANT_URL:=}",
-                            ),
-                        ),
-                        Provider(
-                            provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
-                            provider_type="remote::weaviate",
-                            config=WeaviateVectorIOConfig.sample_run_config(
-                                f"~/.llama/distributions/{name}",
-                                cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
-                            ),
-                        ),
-                    ],
-                    "files": [files_provider],
-                },
-                default_models=[],
-                default_tool_groups=default_tool_groups,
-                default_shields=default_shields,
-                vector_stores_config=VectorStoresConfig(
-                    default_provider_id="faiss",
-                    default_embedding_model=QualifiedModel(
-                        provider_id="sentence-transformers",
-                        model_id="nomic-ai/nomic-embed-text-v1.5",
-                    ),
-                ),
-            ),
+            "run.yaml": base_run_settings,
+            "run-with-postgres-store.yaml": postgres_run_settings,
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (