feat(tests): implement test isolation for inference recordings

Uses test_id in request hashes and test-scoped subdirectories to prevent cross-test contamination. Model list endpoints exclude test_id to enable merging recordings from different servers.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Ashwin Bharambe 2025-10-03 20:05:24 -07:00
parent f176196fba
commit 7008a70a6e
303 changed files with 40627 additions and 105140 deletions

View file

@ -36,6 +36,24 @@ def pytest_sessionstart(session):
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
@pytest.fixture(autouse=True)
def _track_test_context(request):
"""Automatically track current test context for isolated recordings.
This fixture runs for every test and stores the test's nodeid in a contextvar
that the recording system can access to determine which subdirectory to use.
"""
from llama_stack.testing.inference_recorder import _test_context
# Store the test nodeid (e.g., "tests/integration/responses/test_basic.py::test_foo[params]")
token = _test_context.set(request.node.nodeid)
yield
# Cleanup
_test_context.reset(token)
def pytest_runtest_teardown(item):
# Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
outcome = getattr(item, "execution_outcome", None)
@ -137,8 +155,8 @@ def pytest_addoption(parser):
parser.addoption(
"--inference-mode",
help="Inference mode: { record, replay, live } (default: replay)",
choices=["record", "replay", "live"],
help="Inference mode: { record, replay, live, record-if-missing } (default: replay)",
choices=["record", "replay", "live", "record-if-missing"],
default="replay",
)
parser.addoption(