safety, chore: async inference store write

# What does this PR do?


## Test Plan
# What does this PR do?


## Test Plan
This commit is contained in:
Eric Huang 2025-09-03 06:06:02 -07:00
parent faf891b40c
commit d0626ac535
2 changed files with 10 additions and 2 deletions

View file

@ -527,7 +527,7 @@ class InferenceRouter(Inference):
# Store the response with the ID that will be returned to the client
if self.store:
await self.store.store_chat_completion(response, messages)
asyncio.create_task(self.store.store_chat_completion(response, messages))
if self.telemetry:
metrics = self._construct_metrics(
@ -855,4 +855,4 @@ class InferenceRouter(Inference):
object="chat.completion",
)
logger.debug(f"InferenceRouter.completion_response: {final_response}")
await self.store.store_chat_completion(final_response, messages)
asyncio.create_task(self.store.store_chat_completion(final_response, messages))