fix(responses): sync conversation before yielding terminal events in streaming (#3888)

Move conversation sync logic before yield to ensure it executes even
when
streaming consumers break early after receiving response.completed
event.

## Test Plan

```
OLLAMA_URL=http://localhost:11434 \
  pytest -sv tests/integration/responses/ \
  --stack-config server:ci-tests \
  --text-model ollama/llama3.2:3b-instruct-fp16 \
  --inference-mode live \
  -k conversation_multi
```

This test now passes.
This commit is contained in:
Ashwin Bharambe 2025-10-22 14:31:12 -07:00 committed by GitHub
parent cb2185b936
commit 30ba8c8655
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 11 additions and 4 deletions

View file

@ -40,7 +40,12 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
"""Start a llama stack server with the given config."""
cmd = f"uv run llama stack run {config_name}"
# remove server.log if it exists
if os.path.exists("server.log"):
os.remove("server.log")
cmd = f"llama stack run {config_name}"
devnull = open(os.devnull, "w")
process = subprocess.Popen(
shlex.split(cmd),