mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-11 19:56:03 +00:00
fix: keep write queues enabled, flush before returning non-streaming responses
Keep write queues enabled for all backends (simplicity + performance). WAL mode handles SQLite concurrency without locking. Flush queued writes before returning in non-streaming mode to ensure immediate visibility for callers who expect synchronous behavior.
This commit is contained in:
parent
a63e0a84d3
commit
554d958931
3 changed files with 3 additions and 14 deletions
|
|
@ -316,6 +316,9 @@ class OpenAIResponsesImpl:
|
||||||
|
|
||||||
if final_response is None:
|
if final_response is None:
|
||||||
raise ValueError("The response stream never reached a terminal state")
|
raise ValueError("The response stream never reached a terminal state")
|
||||||
|
|
||||||
|
# Flush any queued writes to ensure immediate visibility
|
||||||
|
await self.responses_store.flush()
|
||||||
return final_response
|
return final_response
|
||||||
|
|
||||||
async def _create_streaming_response(
|
async def _create_streaming_response(
|
||||||
|
|
|
||||||
|
|
@ -48,13 +48,6 @@ class InferenceStore:
|
||||||
base_store = sqlstore_impl(self.reference)
|
base_store = sqlstore_impl(self.reference)
|
||||||
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
||||||
|
|
||||||
# Disable write queue for SQLite since WAL mode handles concurrency
|
|
||||||
# Keep it enabled for other backends (like Postgres) for performance
|
|
||||||
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
|
|
||||||
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
|
|
||||||
self.enable_write_queue = False
|
|
||||||
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
|
|
||||||
|
|
||||||
await self.sql_store.create_table(
|
await self.sql_store.create_table(
|
||||||
"chat_completions",
|
"chat_completions",
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -70,13 +70,6 @@ class ResponsesStore:
|
||||||
base_store = sqlstore_impl(self.reference)
|
base_store = sqlstore_impl(self.reference)
|
||||||
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
||||||
|
|
||||||
# Disable write queue for SQLite since WAL mode handles concurrency
|
|
||||||
# Keep it enabled for other backends (like Postgres) for performance
|
|
||||||
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
|
|
||||||
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
|
|
||||||
self.enable_write_queue = False
|
|
||||||
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
|
|
||||||
|
|
||||||
await self.sql_store.create_table(
|
await self.sql_store.create_table(
|
||||||
"openai_responses",
|
"openai_responses",
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue