fix(responses): sync conversation before yielding terminal events in streaming (#3888)

Move conversation sync logic before yield to ensure it executes even
when
streaming consumers break early after receiving response.completed
event.

## Test Plan

```
OLLAMA_URL=http://localhost:11434 \
  pytest -sv tests/integration/responses/ \
  --stack-config server:ci-tests \
  --text-model ollama/llama3.2:3b-instruct-fp16 \
  --inference-mode live \
  -k conversation_multi
```

This test now passes.
This commit is contained in:
Ashwin Bharambe 2025-10-22 14:31:12 -07:00 committed by GitHub
parent cb2185b936
commit 30ba8c8655
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 11 additions and 4 deletions

View file

@ -372,14 +372,13 @@ class OpenAIResponsesImpl:
final_response = stream_chunk.response
elif stream_chunk.type == "response.failed":
failed_response = stream_chunk.response
yield stream_chunk
if stream_chunk.type == "response.output_item.done":
item = stream_chunk.item
output_items.append(item)
# Store and sync immediately after yielding terminal events
# This ensures the storage/syncing happens even if the consumer breaks early
# Store and sync before yielding terminal events
# This ensures the storage/syncing happens even if the consumer breaks after receiving the event
if (
stream_chunk.type in {"response.completed", "response.incomplete"}
and final_response
@ -400,6 +399,8 @@ class OpenAIResponsesImpl:
await self._sync_response_to_conversation(conversation, input, output_items)
await self.responses_store.store_conversation_messages(conversation, messages_to_store)
yield stream_chunk
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
return await self.responses_store.delete_response_object(response_id)