fix: httpcore deadlock in CI by properly closing streaming responses (#4335)

# What does this PR do? The test_conversation_error_handling test was timing out in CI with a deadlock in httpcore's connection pool. The root cause was the preceding test_conversation_multi_turn_and_streaming test, which broke out of the streaming response iterator early without properly closing the underlying HTTP connection. When a streaming response iterator is abandoned mid-stream, the HTTP connection remains in an incomplete state. Since the openai_client fixture is session-scoped, subsequent tests reuse the same httpcore connection pool. The dangling connection causes the pool's internal lock to deadlock when the next test attempts to acquire a new connection. The fix wraps the streaming response in a context manager, which ensures the connection is properly closed when exiting the with block, even when breaking out of the loop early. This is a best practice when working with streaming HTTP responses that may not be fully consumed. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-16 20:42:38 +00:00 · 2025-12-08 16:38:46 +01:00 · 2025-12-08 16:38:46 +01:00 · d82a2cd6f8
commit d82a2cd6f8
parent 20c11d8fd4
1 changed files with 9 additions and 15 deletions
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@ -43,19 +43,18 @@ class TestConversationResponses:
            conversation=conversation.id,
        )

-        # Second turn with streaming
-        response_stream = openai_client.responses.create(
+        # Second turn with streaming - use context manager to ensure proper connection cleanup
+        with openai_client.responses.create(
            model=text_model_id,
            input=[{"role": "user", "content": "Say goodbye"}],
            conversation=conversation.id,
            stream=True,
-        )
-
-        final_response = None
-        for chunk in response_stream:
-            if chunk.type == "response.completed":
-                final_response = chunk.response
-                break
+        ) as response_stream:
+            final_response = None
+            for chunk in response_stream:
+                if chunk.type == "response.completed":
+                    final_response = chunk.response
+                    break

        assert response1.id != final_response.id
        assert len(response1.output_text.strip()) > 0
@ -67,12 +66,7 @@ class TestConversationResponses:

    @pytest.mark.timeout(60, method="thread")
    def test_conversation_context_loading(self, openai_client, text_model_id):
-        """Test that conversation context is properly loaded for responses.
-
-        Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
-        after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
-        Investigation needed: connection pool exhaustion or event loop state issue.
-        """
+        """Test that conversation context is properly loaded for responses."""
        conversation = openai_client.conversations.create(
            items=[
                {"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},