fix: remove flush() call and disable write queues for SQLite to prevent deadlock

The flush() call was causing deadlocks because:
- WAL mode allows only ONE concurrent writer despite multiple readers
- Write queues with multiple workers create writer contention
- flush() waits for all queued writes while workers block on DB locks

Solution: Disable write queues for SQLite and let WAL mode handle concurrency
directly. This makes writes synchronous, eliminating visibility issues without
deadlocks. Keep queues enabled for other backends like Postgres.
This commit is contained in:
Ashwin Bharambe 2025-11-03 15:21:46 -08:00
parent 09f38c9ce6
commit 3f79df2faa
3 changed files with 18 additions and 7 deletions

View file

@ -316,9 +316,6 @@ class OpenAIResponsesImpl:
if final_response is None: if final_response is None:
raise ValueError("The response stream never reached a terminal state") raise ValueError("The response stream never reached a terminal state")
# Flush any queued writes to ensure immediate visibility
await self.responses_store.flush()
return final_response return final_response
async def _create_streaming_response( async def _create_streaming_response(

View file

@ -16,12 +16,12 @@ from llama_stack.apis.inference import (
Order, Order,
) )
from llama_stack.core.datatypes import AccessRule from llama_stack.core.datatypes import AccessRule
from llama_stack.core.storage.datatypes import InferenceStoreReference from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType
from llama_stack.log import get_logger from llama_stack.log import get_logger
from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.api import ColumnDefinition, ColumnType
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
from ..sqlstore.sqlstore import sqlstore_impl from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
logger = get_logger(name=__name__, category="inference") logger = get_logger(name=__name__, category="inference")
@ -48,6 +48,13 @@ class InferenceStore:
base_store = sqlstore_impl(self.reference) base_store = sqlstore_impl(self.reference)
self.sql_store = AuthorizedSqlStore(base_store, self.policy) self.sql_store = AuthorizedSqlStore(base_store, self.policy)
# Disable write queue for SQLite since WAL mode handles concurrency
# Keep it enabled for other backends (like Postgres) for performance
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
self.enable_write_queue = False
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
await self.sql_store.create_table( await self.sql_store.create_table(
"chat_completions", "chat_completions",
{ {

View file

@ -19,12 +19,12 @@ from llama_stack.apis.agents.openai_responses import (
) )
from llama_stack.apis.inference import OpenAIMessageParam from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.core.datatypes import AccessRule from llama_stack.core.datatypes import AccessRule
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType
from llama_stack.log import get_logger from llama_stack.log import get_logger
from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.api import ColumnDefinition, ColumnType
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
from ..sqlstore.sqlstore import sqlstore_impl from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
logger = get_logger(name=__name__, category="openai_responses") logger = get_logger(name=__name__, category="openai_responses")
@ -70,6 +70,13 @@ class ResponsesStore:
base_store = sqlstore_impl(self.reference) base_store = sqlstore_impl(self.reference)
self.sql_store = AuthorizedSqlStore(base_store, self.policy) self.sql_store = AuthorizedSqlStore(base_store, self.policy)
# Disable write queue for SQLite since WAL mode handles concurrency
# Keep it enabled for other backends (like Postgres) for performance
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
self.enable_write_queue = False
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
await self.sql_store.create_table( await self.sql_store.create_table(
"openai_responses", "openai_responses",
{ {