fix(responses): sync conversation before yielding terminal events in streaming (#3888)

Move conversation sync logic before yield to ensure it executes even when streaming consumers break early after receiving response.completed event. ## Test Plan ``` OLLAMA_URL=http://localhost:11434 \ pytest -sv tests/integration/responses/ \ --stack-config server:ci-tests \ --text-model ollama/llama3.2:3b-instruct-fp16 \ --inference-mode live \ -k conversation_multi ``` This test now passes.
2025-12-11 19:56:03 +00:00 · 2025-10-22 14:31:12 -07:00 · 2025-10-22 14:31:12 -07:00 · 30ba8c8655
commit 30ba8c8655
parent cb2185b936
3 changed files with 11 additions and 4 deletions
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -40,7 +40,12 @@ def is_port_available(port: int, host: str = "localhost") -> bool:

 def start_llama_stack_server(config_name: str) -> subprocess.Popen:
    """Start a llama stack server with the given config."""
-    cmd = f"uv run llama stack run {config_name}"
+
+    # remove server.log if it exists
+    if os.path.exists("server.log"):
+        os.remove("server.log")
+
+    cmd = f"llama stack run {config_name}"
    devnull = open(os.devnull, "w")
    process = subprocess.Popen(
        shlex.split(cmd),