safety, chore: async inference store write

# What does this PR do? ## Test Plan # What does this PR do? ## Test Plan
2025-10-05 20:27:35 +00:00 · 2025-09-03 06:06:02 -07:00 · 2025-09-03 06:06:02 -07:00 · d0626ac535
commit d0626ac535
parent faf891b40c
2 changed files with 10 additions and 2 deletions
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@ -527,7 +527,7 @@ class InferenceRouter(Inference):

        # Store the response with the ID that will be returned to the client
        if self.store:
-            await self.store.store_chat_completion(response, messages)
+            asyncio.create_task(self.store.store_chat_completion(response, messages))

        if self.telemetry:
            metrics = self._construct_metrics(
@ -855,4 +855,4 @@ class InferenceRouter(Inference):
                    object="chat.completion",
                )
                logger.debug(f"InferenceRouter.completion_response: {final_response}")
-                await self.store.store_chat_completion(final_response, messages)
+                asyncio.create_task(self.store.store_chat_completion(final_response, messages))