fix: keep write queues enabled, flush before returning non-streaming responses

Keep write queues enabled for all backends (simplicity + performance). WAL
mode handles SQLite concurrency without locking. Flush queued writes before
returning in non-streaming mode to ensure immediate visibility for callers
who expect synchronous behavior.
This commit is contained in:
Ashwin Bharambe 2025-11-03 14:34:23 -08:00
parent a63e0a84d3
commit 554d958931
3 changed files with 3 additions and 14 deletions

View file

@ -316,6 +316,9 @@ class OpenAIResponsesImpl:
if final_response is None:
raise ValueError("The response stream never reached a terminal state")
# Flush any queued writes to ensure immediate visibility
await self.responses_store.flush()
return final_response
async def _create_streaming_response(

View file

@ -48,13 +48,6 @@ class InferenceStore:
base_store = sqlstore_impl(self.reference)
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
# Disable write queue for SQLite since WAL mode handles concurrency
# Keep it enabled for other backends (like Postgres) for performance
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
self.enable_write_queue = False
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
await self.sql_store.create_table(
"chat_completions",
{

View file

@ -70,13 +70,6 @@ class ResponsesStore:
base_store = sqlstore_impl(self.reference)
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
# Disable write queue for SQLite since WAL mode handles concurrency
# Keep it enabled for other backends (like Postgres) for performance
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
self.enable_write_queue = False
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
await self.sql_store.create_table(
"openai_responses",
{