fix(responses): sync conversation before yielding terminal events in streaming (#3888)

Move conversation sync logic before yield to ensure it executes even when streaming consumers break early after receiving response.completed event. ## Test Plan ``` OLLAMA_URL=http://localhost:11434 \ pytest -sv tests/integration/responses/ \ --stack-config server:ci-tests \ --text-model ollama/llama3.2:3b-instruct-fp16 \ --inference-mode live \ -k conversation_multi ``` This test now passes.
2025-12-10 19:43:16 +00:00 · 2025-10-22 14:31:12 -07:00 · 2025-10-22 14:31:12 -07:00 · 30ba8c8655
commit 30ba8c8655
parent cb2185b936
3 changed files with 11 additions and 4 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
+++ b/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@ -372,14 +372,13 @@ class OpenAIResponsesImpl:
                final_response = stream_chunk.response
            elif stream_chunk.type == "response.failed":
                failed_response = stream_chunk.response
-            yield stream_chunk

            if stream_chunk.type == "response.output_item.done":
                item = stream_chunk.item
                output_items.append(item)

-            # Store and sync immediately after yielding terminal events
-            # This ensures the storage/syncing happens even if the consumer breaks early
+            # Store and sync before yielding terminal events
+            # This ensures the storage/syncing happens even if the consumer breaks after receiving the event
            if (
                stream_chunk.type in {"response.completed", "response.incomplete"}
                and final_response
@ -400,6 +399,8 @@ class OpenAIResponsesImpl:
                    await self._sync_response_to_conversation(conversation, input, output_items)
                    await self.responses_store.store_conversation_messages(conversation, messages_to_store)

+            yield stream_chunk
+
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        return await self.responses_store.delete_response_object(response_id)