feat(tests): implement test isolation for inference recordings (#3681)

Uses test_id in request hashes and test-scoped subdirectories to prevent
cross-test contamination. Model list endpoints exclude test_id to enable
merging recordings from different servers.

Additionally, this PR adds a `record-if-missing` mode (which we will use
instead of `record` which records everything) which is very useful.

🤖 Co-authored with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Ashwin Bharambe 2025-10-04 11:34:18 -07:00 committed by GitHub
parent f176196fba
commit 045a0c1d57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
428 changed files with 85345 additions and 104330 deletions

View file

@ -131,10 +131,6 @@ class TestInferenceRecording:
temp_storage_dir = temp_storage_dir / "test_response_storage"
storage = ResponseStorage(temp_storage_dir)
# Test directory creation
assert storage.test_dir.exists()
assert storage.responses_dir.exists()
# Test storing and retrieving a recording
request_hash = "test_hash_123"
request_data = {
@ -174,7 +170,8 @@ class TestInferenceRecording:
# Verify recording was stored
storage = ResponseStorage(temp_storage_dir)
assert storage.responses_dir.exists()
dir = storage._get_test_dir()
assert dir.exists()
async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
"""Test that replay mode returns stored responses without making real calls."""