feat(tests): implement test isolation for inference recordings (#3681)

Uses test_id in request hashes and test-scoped subdirectories to prevent cross-test contamination. Model list endpoints exclude test_id to enable merging recordings from different servers. Additionally, this PR adds a `record-if-missing` mode (which we will use instead of `record` which records everything) which is very useful. 🤖 Co-authored with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-12-03 09:53:45 +00:00 · 2025-10-04 11:34:18 -07:00 · 2025-10-04 11:34:18 -07:00 · 045a0c1d57
commit 045a0c1d57
parent f176196fba
428 changed files with 85345 additions and 104330 deletions
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@ -36,6 +36,24 @@ def pytest_sessionstart(session):
        os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"


+@pytest.fixture(autouse=True)
+def _track_test_context(request):
+    """Automatically track current test context for isolated recordings.
+
+    This fixture runs for every test and stores the test's nodeid in a contextvar
+    that the recording system can access to determine which subdirectory to use.
+    """
+    from llama_stack.testing.inference_recorder import _test_context
+
+    # Store the test nodeid (e.g., "tests/integration/responses/test_basic.py::test_foo[params]")
+    token = _test_context.set(request.node.nodeid)
+
+    yield
+
+    # Cleanup
+    _test_context.reset(token)
+
+
 def pytest_runtest_teardown(item):
    # Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
    outcome = getattr(item, "execution_outcome", None)
@ -137,8 +155,8 @@ def pytest_addoption(parser):

    parser.addoption(
        "--inference-mode",
-        help="Inference mode: { record, replay, live } (default: replay)",
-        choices=["record", "replay", "live"],
+        help="Inference mode: { record, replay, live, record-if-missing } (default: replay)",
+        choices=["record", "replay", "live", "record-if-missing"],
        default="replay",
    )
    parser.addoption(