mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
Fixes race condition causing "database is locked" errors during concurrent writes to SQLite, particularly in streaming responses with guardrails where multiple inference calls write simultaneously. Enable Write-Ahead Logging (WAL) mode for SQLite which allows multiple concurrent readers and one writer without blocking. Set busy_timeout to 5s so SQLite retries instead of failing immediately. Remove the logic that disabled write queues for SQLite since WAL mode eliminates the locking issues that prompted disabling them. Fixes: test_output_safety_guardrails_safe_content[stream=True] flake<hr>This is an automatic backport of pull request #4048 done by [Mergify](https://mergify.com). Signed-off-by: Charlie Doern <cdoern@redhat.com> Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
46bd95e453
commit
b9299a20ed
3 changed files with 65 additions and 11 deletions
|
|
@ -35,6 +35,7 @@ class InferenceStore:
|
|||
self.reference = reference
|
||||
self.sql_store = None
|
||||
self.policy = policy
|
||||
self.enable_write_queue = True
|
||||
|
||||
# Async write queue and worker control
|
||||
self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None
|
||||
|
|
@ -47,14 +48,13 @@ class InferenceStore:
|
|||
base_store = sqlstore_impl(self.reference)
|
||||
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
||||
|
||||
# Disable write queue for SQLite to avoid concurrency issues
|
||||
backend_name = self.reference.backend
|
||||
backend_config = _SQLSTORE_BACKENDS.get(backend_name)
|
||||
if backend_config is None:
|
||||
raise ValueError(
|
||||
f"Unregistered SQL backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
|
||||
)
|
||||
self.enable_write_queue = backend_config.type != StorageBackendType.SQL_SQLITE
|
||||
# Disable write queue for SQLite since WAL mode handles concurrency
|
||||
# Keep it enabled for other backends (like Postgres) for performance
|
||||
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
|
||||
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
|
||||
self.enable_write_queue = False
|
||||
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
|
||||
|
||||
await self.sql_store.create_table(
|
||||
"chat_completions",
|
||||
{
|
||||
|
|
@ -66,6 +66,14 @@ class InferenceStore:
|
|||
},
|
||||
)
|
||||
|
||||
if self.enable_write_queue:
|
||||
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
||||
for _ in range(self._num_writers):
|
||||
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
||||
logger.debug(
|
||||
f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
if not self._worker_tasks:
|
||||
return
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue