feat(responses): implement usage tracking in streaming responses

Implementation changes: - Add usage accumulation to StreamingResponseOrchestrator - Enable stream_options to receive usage in streaming chunks - Track usage across multi-turn responses with tool execution - Convert between chat completion and response usage formats - Extract usage accumulation into helper method for clarity Test changes: - Add usage assertions to streaming and non-streaming tests - Update test recordings with actual usage data from OpenAI
2025-12-16 13:52:37 +00:00 · 2025-10-09 21:12:50 -07:00 · 2025-10-09 21:12:50 -07:00 · cfb86f1178
commit cfb86f1178
parent 548ccff368
10 changed files with 3485 additions and 612 deletions
--- a/tests/integration/responses/test_basic_responses.py
+++ b/tests/integration/responses/test_basic_responses.py
@ -23,6 +23,14 @@ def test_response_non_streaming_basic(compat_client, text_model_id, case):
    assert len(output_text) > 0
    assert case.expected.lower() in output_text

+    # Verify usage is reported
+    assert response.usage is not None, "Response should include usage information"
+    assert response.usage.input_tokens > 0, "Input tokens should be greater than 0"
+    assert response.usage.output_tokens > 0, "Output tokens should be greater than 0"
+    assert response.usage.total_tokens == response.usage.input_tokens + response.usage.output_tokens, (
+        "Total tokens should equal input + output tokens"
+    )
+
    retrieved_response = compat_client.responses.retrieve(response_id=response.id)
    assert retrieved_response.output_text == response.output_text

@ -73,6 +81,15 @@ def test_response_streaming_basic(compat_client, text_model_id, case):
            assert len(output_text) > 0, "Response should have content"
            assert case.expected.lower() in output_text, f"Expected '{case.expected}' in response"

+            # Verify usage is reported in final response
+            assert chunk.response.usage is not None, "Completed response should include usage information"
+            assert chunk.response.usage.input_tokens > 0, "Input tokens should be greater than 0"
+            assert chunk.response.usage.output_tokens > 0, "Output tokens should be greater than 0"
+            assert (
+                chunk.response.usage.total_tokens
+                == chunk.response.usage.input_tokens + chunk.response.usage.output_tokens
+            ), "Total tokens should equal input + output tokens"
+
    # Use validator for common checks
    validator = StreamingValidator(events)
    validator.assert_basic_event_sequence()