chore(ci): remove unused recordings (backport #4074) (#4141)

Added a script to cleanup recordings. While doing this, moved the CI matrix generation to a separate script so there is a single source of truth for the matrix. Ran the cleanup script as: ``` PYTHONPATH=. python scripts/cleanup_recordings.py ``` Also added this as part of the pre-commit workflow to ensure that the recordings are always up to date and that no stale recordings are left in the repo. <hr>This is an automatic backport of pull request #4074 done by [Mergify](https://mergify.com). --------- Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-12-03 09:53:45 +00:00 · 2025-11-12 12:36:28 -08:00 · 2025-11-12 12:36:28 -08:00 · 56d87f5133
commit 56d87f5133
parent 0d525d9a24
4 changed files with 363 additions and 12 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -22,6 +22,8 @@ on:
      - '.github/actions/setup-ollama/action.yml'
      - '.github/actions/setup-test-environment/action.yml'
      - '.github/actions/run-and-record-tests/action.yml'
      - 'scripts/integration-tests.sh'
      - 'scripts/generate_ci_matrix.py'
  schedule:
    # If changing the cron schedule, update the provider in the test-matrix job
    - cron: '0 0 * * *'  # (test latest client) Daily at 12 AM UTC
@ -43,8 +45,27 @@ concurrency:
  cancel-in-progress: true
 jobs:
  generate-matrix:
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
      - name: Generate test matrix
        id: set-matrix
        run: |
          # Generate matrix from CI_MATRIX in tests/integration/suites.py
          # Supports schedule-based and manual input overrides
          MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
            --schedule "${{ github.event.schedule }}" \
            --test-setup "${{ github.event.inputs.test-setup }}")
          echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
          echo "Generated matrix: $MATRIX"
  run-replay-mode-tests:
    needs: generate-matrix
    runs-on: ubuntu-latest
    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
@ -55,18 +76,9 @@ jobs:
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
-        # Define (setup, suite) pairs - they are always matched and cannot be independent
+        # Test configurations: Generated from CI_MATRIX in tests/integration/suites.py
-        # Weekly schedule (Sun 1 AM): vllm+base
+        # See scripts/generate_ci_matrix.py for generation logic
-        # Input test-setup=ollama-vision: ollama-vision+vision
+        config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }}
        # Default (including test-setup=ollama): ollama+base, ollama-vision+vision, gpt+responses
        config: >-
          ${{
            github.event.schedule == '1 0 * * 0'
              && fromJSON('[{"setup": "vllm", "suite": "base"}]')
            || github.event.inputs.test-setup == 'ollama-vision'
              && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
            || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
          }}
    steps:
      - name: Checkout repository
--- a/scripts/cleanup_recordings.py
+++ b/scripts/cleanup_recordings.py
@ -0,0 +1,272 @@
 #!/usr/bin/env python3
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """
 Clean up unused test recordings based on CI test collection.
 This script:
 1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides)
 2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI
 3. Compares against existing recordings to identify unused ones
 4. Optionally deletes unused recordings
 Usage:
    # Dry run - see what would be deleted
    ./scripts/cleanup_recordings.py
    # Save manifest of CI test IDs for inspection
    ./scripts/cleanup_recordings.py --manifest ci_tests.txt
    # Actually delete unused recordings
    ./scripts/cleanup_recordings.py --delete
 """
 import argparse
 import json
 import os
 import subprocess
 import tempfile
 from collections import defaultdict
 from pathlib import Path
 REPO_ROOT = Path(__file__).parent.parent
 # Load CI matrix from JSON file
 CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json"
 with open(CI_MATRIX_FILE) as f:
    _matrix_config = json.load(f)
 DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"]
 SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {})
 def _unique_configs(entries):
    seen: set[tuple[str, str]] = set()
    for entry in entries:
        suite = entry["suite"]
        setup = entry["setup"]
        key = (suite, setup)
        if key in seen:
            continue
        seen.add(key)
        yield {"suite": suite, "setup": setup}
 def iter_all_ci_configs() -> list[dict[str, str]]:
    """Return unique CI configs across default and scheduled matrices."""
    combined = list(DEFAULT_CI_MATRIX)
    for configs in SCHEDULED_MATRICES.values():
        combined.extend(configs)
    return list(_unique_configs(combined))
 def collect_ci_tests():
    """Collect all test IDs that would run in CI using --collect-only with JSON output."""
    all_test_ids = set()
    configs = iter_all_ci_configs()
    for config in configs:
        print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...")
        # Create a temporary file for JSON report
        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
            json_report_file = f.name
        try:
            # Configure environment for collection run
            env = os.environ.copy()
            env["PYTEST_ADDOPTS"] = f"--json-report --json-report-file={json_report_file}"
            repo_path = str(REPO_ROOT)
            existing_path = env.get("PYTHONPATH", "")
            env["PYTHONPATH"] = f"{repo_path}{os.pathsep}{existing_path}" if existing_path else repo_path
            result = subprocess.run(
                [
                    "./scripts/integration-tests.sh",
                    "--collect-only",
                    "--suite",
                    config["suite"],
                    "--setup",
                    config["setup"],
                ],
                capture_output=True,
                text=True,
                cwd=REPO_ROOT,
                env=env,
            )
            if result.returncode != 0:
                raise RuntimeError(
                    "Test collection failed.\n"
                    f"Command: {' '.join(result.args)}\n"
                    f"stdout:\n{result.stdout}\n"
                    f"stderr:\n{result.stderr}"
                )
            # Parse JSON report to extract test IDs
            try:
                with open(json_report_file) as f:
                    report = json.load(f)
                # The "collectors" field contains collected test items
                # Each collector has a "result" array with test node IDs
                for collector in report.get("collectors", []):
                    for item in collector.get("result", []):
                        # The "nodeid" field is the test ID
                        if "nodeid" in item:
                            all_test_ids.add(item["nodeid"])
                print(f"  Collected {len(all_test_ids)} test IDs so far")
            except (json.JSONDecodeError, FileNotFoundError) as e:
                print(f"  Warning: Failed to parse JSON report: {e}")
                continue
        finally:
            # Clean up temp file
            if os.path.exists(json_report_file):
                os.unlink(json_report_file)
    print(f"\nTotal unique test IDs collected: {len(all_test_ids)}")
    return all_test_ids, configs
 def get_base_test_id(test_id: str) -> str:
    """Extract base test ID without parameterization.
    Example:
      'tests/integration/inference/test_foo.py::test_bar[param1-param2]'
      -> 'tests/integration/inference/test_foo.py::test_bar'
    """
    return test_id.split("[")[0] if "[" in test_id else test_id
 def find_all_recordings():
    """Find all recording JSON files."""
    return list((REPO_ROOT / "tests/integration").rglob("recordings/*.json"))
 def analyze_recordings(ci_test_ids, dry_run=True):
    """Analyze recordings and identify unused ones."""
    # Use full test IDs with parameterization for exact matching
    all_recordings = find_all_recordings()
    print(f"\nTotal recording files: {len(all_recordings)}")
    # Categorize recordings
    used_recordings = []
    unused_recordings = []
    shared_recordings = []  # model-list endpoints without test_id
    parse_errors = []
    for json_file in all_recordings:
        try:
            with open(json_file) as f:
                data = json.load(f)
            test_id = data.get("test_id", "")
            if not test_id:
                # Shared/infrastructure recordings (model lists, etc)
                shared_recordings.append(json_file)
                continue
            # Match exact test_id (with full parameterization)
            if test_id in ci_test_ids:
                used_recordings.append(json_file)
            else:
                unused_recordings.append((json_file, test_id))
        except Exception as e:
            parse_errors.append((json_file, str(e)))
    # Print summary
    print("\nRecording Analysis:")
    print(f"  Used in CI:     {len(used_recordings)}")
    print(f"  Shared (no ID): {len(shared_recordings)}")
    print(f"  UNUSED:         {len(unused_recordings)}")
    print(f"  Parse errors:   {len(parse_errors)}")
    if unused_recordings:
        print("\nUnused recordings by test:")
        # Group by base test ID
        by_test = defaultdict(list)
        for file, test_id in unused_recordings:
            base = get_base_test_id(test_id)
            by_test[base].append(file)
        for base_test, files in sorted(by_test.items()):
            print(f"\n  {base_test}")
            print(f"    ({len(files)} recording(s))")
            for f in files[:3]:
                print(f"      - {f.relative_to(REPO_ROOT / 'tests/integration')}")
            if len(files) > 3:
                print(f"      ... and {len(files) - 3} more")
    if parse_errors:
        print("\nParse errors:")
        for file, error in parse_errors[:5]:
            print(f"  {file.relative_to(REPO_ROOT)}: {error}")
        if len(parse_errors) > 5:
            print(f"  ... and {len(parse_errors) - 5} more")
    # Perform cleanup
    if not dry_run:
        print(f"\nDeleting {len(unused_recordings)} unused recordings...")
        for file, _ in unused_recordings:
            file.unlink()
            print(f"  Deleted: {file.relative_to(REPO_ROOT / 'tests/integration')}")
        print("✅ Cleanup complete")
    else:
        print("\n(Dry run - no files deleted)")
        print("\nTo delete these files, run with --delete")
    return len(unused_recordings)
 def main():
    parser = argparse.ArgumentParser(
        description="Clean up unused test recordings based on CI test collection",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    parser.add_argument("--delete", action="store_true", help="Actually delete unused recordings (default is dry-run)")
    parser.add_argument("--manifest", help="Save collected test IDs to file (optional)")
    args = parser.parse_args()
    print("=" * 60)
    print("Recording Cleanup Utility")
    print("=" * 60)
    ci_configs = iter_all_ci_configs()
    print(f"\nDetected CI configurations: {len(ci_configs)}")
    for config in ci_configs:
        print(f"  - suite={config['suite']}, setup={config['setup']}")
    # Collect test IDs from CI configurations
    ci_test_ids, _ = collect_ci_tests()
    if args.manifest:
        with open(args.manifest, "w") as f:
            for test_id in sorted(ci_test_ids):
                f.write(f"{test_id}\n")
        print(f"\nSaved test IDs to: {args.manifest}")
    # Analyze and cleanup
    unused_count = analyze_recordings(ci_test_ids, dry_run=not args.delete)
    print("\n" + "=" * 60)
    if unused_count > 0 and not args.delete:
        print("Run with --delete to remove unused recordings")
 if __name__ == "__main__":
    main()
--- a/scripts/generate_ci_matrix.py
+++ b/scripts/generate_ci_matrix.py
@ -0,0 +1,61 @@
 #!/usr/bin/env python3
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """
 Generate CI test matrix from ci_matrix.json with schedule/input overrides.
 This script is used by .github/workflows/integration-tests.yml to generate
 the test matrix dynamically based on the CI_MATRIX definition.
 """
 import json
 from pathlib import Path
 CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json"
 with open(CI_MATRIX_FILE) as f:
    matrix_config = json.load(f)
 DEFAULT_MATRIX = matrix_config["default"]
 SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {})
 def generate_matrix(schedule="", test_setup=""):
    """
    Generate test matrix based on schedule or manual input.
    Args:
        schedule: GitHub cron schedule string (e.g., "1 0 * * 0" for weekly)
        test_setup: Manual test setup input (e.g., "ollama-vision")
    Returns:
        Matrix configuration as JSON string
    """
    # Weekly scheduled test matrices
    if schedule and schedule in SCHEDULE_MATRICES:
        matrix = SCHEDULE_MATRICES[schedule]
    # Manual input for specific setup
    elif test_setup == "ollama-vision":
        matrix = [{"suite": "vision", "setup": "ollama-vision"}]
    # Default: use JSON-defined matrix
    else:
        matrix = DEFAULT_MATRIX
    # GitHub Actions expects {"include": [...]} format
    return json.dumps({"include": matrix})
 if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Generate CI test matrix")
    parser.add_argument("--schedule", default="", help="GitHub schedule cron string")
    parser.add_argument("--test-setup", default="", help="Manual test setup input")
    args = parser.parse_args()
    print(generate_matrix(args.schedule, args.test_setup))
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@ -0,0 +1,6 @@
 {
  "default": [
    {"suite": "base", "setup": "ollama"},
    {"suite": "vision", "setup": "ollama-vision"}
  ]
 }