diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 49f643c26..a27c60d7f 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -22,6 +22,8 @@ on: - '.github/actions/setup-ollama/action.yml' - '.github/actions/setup-test-environment/action.yml' - '.github/actions/run-and-record-tests/action.yml' + - 'scripts/integration-tests.sh' + - 'scripts/generate_ci_matrix.py' schedule: # If changing the cron schedule, update the provider in the test-matrix job - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC @@ -43,8 +45,27 @@ concurrency: cancel-in-progress: true jobs: + generate-matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout repository + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + + - name: Generate test matrix + id: set-matrix + run: | + # Generate matrix from CI_MATRIX in tests/integration/suites.py + # Supports schedule-based and manual input overrides + MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \ + --schedule "${{ github.event.schedule }}" \ + --test-setup "${{ github.event.inputs.test-setup }}") + echo "matrix=$MATRIX" >> $GITHUB_OUTPUT + echo "Generated matrix: $MATRIX" run-replay-mode-tests: + needs: generate-matrix runs-on: ubuntu-latest name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }} @@ -55,18 +76,9 @@ jobs: # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} - # Define (setup, suite) pairs - they are always matched and cannot be independent - # Weekly schedule (Sun 1 AM): vllm+base - # Input test-setup=ollama-vision: ollama-vision+vision - # Default (including test-setup=ollama): ollama+base, ollama-vision+vision, gpt+responses - config: >- - ${{ - github.event.schedule == '1 0 * * 0' - && fromJSON('[{"setup": "vllm", "suite": "base"}]') - || github.event.inputs.test-setup == 'ollama-vision' - && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]') - || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]') - }} + # Test configurations: Generated from CI_MATRIX in tests/integration/suites.py + # See scripts/generate_ci_matrix.py for generation logic + config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }} steps: - name: Checkout repository diff --git a/scripts/cleanup_recordings.py b/scripts/cleanup_recordings.py new file mode 100755 index 000000000..14f8cce84 --- /dev/null +++ b/scripts/cleanup_recordings.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Clean up unused test recordings based on CI test collection. + +This script: +1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides) +2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI +3. Compares against existing recordings to identify unused ones +4. Optionally deletes unused recordings + +Usage: + # Dry run - see what would be deleted + ./scripts/cleanup_recordings.py + + # Save manifest of CI test IDs for inspection + ./scripts/cleanup_recordings.py --manifest ci_tests.txt + + # Actually delete unused recordings + ./scripts/cleanup_recordings.py --delete +""" + +import argparse +import json +import os +import subprocess +import tempfile +from collections import defaultdict +from pathlib import Path + +REPO_ROOT = Path(__file__).parent.parent + +# Load CI matrix from JSON file +CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json" +with open(CI_MATRIX_FILE) as f: + _matrix_config = json.load(f) + +DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"] +SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {}) + + +def _unique_configs(entries): + seen: set[tuple[str, str]] = set() + for entry in entries: + suite = entry["suite"] + setup = entry["setup"] + key = (suite, setup) + if key in seen: + continue + seen.add(key) + yield {"suite": suite, "setup": setup} + + +def iter_all_ci_configs() -> list[dict[str, str]]: + """Return unique CI configs across default and scheduled matrices.""" + combined = list(DEFAULT_CI_MATRIX) + for configs in SCHEDULED_MATRICES.values(): + combined.extend(configs) + return list(_unique_configs(combined)) + + +def collect_ci_tests(): + """Collect all test IDs that would run in CI using --collect-only with JSON output.""" + + all_test_ids = set() + configs = iter_all_ci_configs() + + for config in configs: + print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...") + + # Create a temporary file for JSON report + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: + json_report_file = f.name + + try: + # Configure environment for collection run + env = os.environ.copy() + env["PYTEST_ADDOPTS"] = f"--json-report --json-report-file={json_report_file}" + repo_path = str(REPO_ROOT) + existing_path = env.get("PYTHONPATH", "") + env["PYTHONPATH"] = f"{repo_path}{os.pathsep}{existing_path}" if existing_path else repo_path + + result = subprocess.run( + [ + "./scripts/integration-tests.sh", + "--collect-only", + "--suite", + config["suite"], + "--setup", + config["setup"], + ], + capture_output=True, + text=True, + cwd=REPO_ROOT, + env=env, + ) + + if result.returncode != 0: + raise RuntimeError( + "Test collection failed.\n" + f"Command: {' '.join(result.args)}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + + # Parse JSON report to extract test IDs + try: + with open(json_report_file) as f: + report = json.load(f) + + # The "collectors" field contains collected test items + # Each collector has a "result" array with test node IDs + for collector in report.get("collectors", []): + for item in collector.get("result", []): + # The "nodeid" field is the test ID + if "nodeid" in item: + all_test_ids.add(item["nodeid"]) + + print(f" Collected {len(all_test_ids)} test IDs so far") + + except (json.JSONDecodeError, FileNotFoundError) as e: + print(f" Warning: Failed to parse JSON report: {e}") + continue + + finally: + # Clean up temp file + if os.path.exists(json_report_file): + os.unlink(json_report_file) + + print(f"\nTotal unique test IDs collected: {len(all_test_ids)}") + return all_test_ids, configs + + +def get_base_test_id(test_id: str) -> str: + """Extract base test ID without parameterization. + + Example: + 'tests/integration/inference/test_foo.py::test_bar[param1-param2]' + -> 'tests/integration/inference/test_foo.py::test_bar' + """ + return test_id.split("[")[0] if "[" in test_id else test_id + + +def find_all_recordings(): + """Find all recording JSON files.""" + return list((REPO_ROOT / "tests/integration").rglob("recordings/*.json")) + + +def analyze_recordings(ci_test_ids, dry_run=True): + """Analyze recordings and identify unused ones.""" + + # Use full test IDs with parameterization for exact matching + all_recordings = find_all_recordings() + print(f"\nTotal recording files: {len(all_recordings)}") + + # Categorize recordings + used_recordings = [] + unused_recordings = [] + shared_recordings = [] # model-list endpoints without test_id + parse_errors = [] + + for json_file in all_recordings: + try: + with open(json_file) as f: + data = json.load(f) + + test_id = data.get("test_id", "") + + if not test_id: + # Shared/infrastructure recordings (model lists, etc) + shared_recordings.append(json_file) + continue + + # Match exact test_id (with full parameterization) + if test_id in ci_test_ids: + used_recordings.append(json_file) + else: + unused_recordings.append((json_file, test_id)) + + except Exception as e: + parse_errors.append((json_file, str(e))) + + # Print summary + print("\nRecording Analysis:") + print(f" Used in CI: {len(used_recordings)}") + print(f" Shared (no ID): {len(shared_recordings)}") + print(f" UNUSED: {len(unused_recordings)}") + print(f" Parse errors: {len(parse_errors)}") + + if unused_recordings: + print("\nUnused recordings by test:") + + # Group by base test ID + by_test = defaultdict(list) + for file, test_id in unused_recordings: + base = get_base_test_id(test_id) + by_test[base].append(file) + + for base_test, files in sorted(by_test.items()): + print(f"\n {base_test}") + print(f" ({len(files)} recording(s))") + for f in files[:3]: + print(f" - {f.relative_to(REPO_ROOT / 'tests/integration')}") + if len(files) > 3: + print(f" ... and {len(files) - 3} more") + + if parse_errors: + print("\nParse errors:") + for file, error in parse_errors[:5]: + print(f" {file.relative_to(REPO_ROOT)}: {error}") + if len(parse_errors) > 5: + print(f" ... and {len(parse_errors) - 5} more") + + # Perform cleanup + if not dry_run: + print(f"\nDeleting {len(unused_recordings)} unused recordings...") + for file, _ in unused_recordings: + file.unlink() + print(f" Deleted: {file.relative_to(REPO_ROOT / 'tests/integration')}") + print("✅ Cleanup complete") + else: + print("\n(Dry run - no files deleted)") + print("\nTo delete these files, run with --delete") + + return len(unused_recordings) + + +def main(): + parser = argparse.ArgumentParser( + description="Clean up unused test recordings based on CI test collection", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument("--delete", action="store_true", help="Actually delete unused recordings (default is dry-run)") + parser.add_argument("--manifest", help="Save collected test IDs to file (optional)") + + args = parser.parse_args() + + print("=" * 60) + print("Recording Cleanup Utility") + print("=" * 60) + + ci_configs = iter_all_ci_configs() + + print(f"\nDetected CI configurations: {len(ci_configs)}") + for config in ci_configs: + print(f" - suite={config['suite']}, setup={config['setup']}") + + # Collect test IDs from CI configurations + ci_test_ids, _ = collect_ci_tests() + + if args.manifest: + with open(args.manifest, "w") as f: + for test_id in sorted(ci_test_ids): + f.write(f"{test_id}\n") + print(f"\nSaved test IDs to: {args.manifest}") + + # Analyze and cleanup + unused_count = analyze_recordings(ci_test_ids, dry_run=not args.delete) + + print("\n" + "=" * 60) + if unused_count > 0 and not args.delete: + print("Run with --delete to remove unused recordings") + + +if __name__ == "__main__": + main() diff --git a/scripts/generate_ci_matrix.py b/scripts/generate_ci_matrix.py new file mode 100755 index 000000000..0d4e924b3 --- /dev/null +++ b/scripts/generate_ci_matrix.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Generate CI test matrix from ci_matrix.json with schedule/input overrides. + +This script is used by .github/workflows/integration-tests.yml to generate +the test matrix dynamically based on the CI_MATRIX definition. +""" + +import json +from pathlib import Path + +CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json" + +with open(CI_MATRIX_FILE) as f: + matrix_config = json.load(f) + +DEFAULT_MATRIX = matrix_config["default"] +SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {}) + + +def generate_matrix(schedule="", test_setup=""): + """ + Generate test matrix based on schedule or manual input. + + Args: + schedule: GitHub cron schedule string (e.g., "1 0 * * 0" for weekly) + test_setup: Manual test setup input (e.g., "ollama-vision") + + Returns: + Matrix configuration as JSON string + """ + # Weekly scheduled test matrices + if schedule and schedule in SCHEDULE_MATRICES: + matrix = SCHEDULE_MATRICES[schedule] + # Manual input for specific setup + elif test_setup == "ollama-vision": + matrix = [{"suite": "vision", "setup": "ollama-vision"}] + # Default: use JSON-defined matrix + else: + matrix = DEFAULT_MATRIX + + # GitHub Actions expects {"include": [...]} format + return json.dumps({"include": matrix}) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Generate CI test matrix") + parser.add_argument("--schedule", default="", help="GitHub schedule cron string") + parser.add_argument("--test-setup", default="", help="Manual test setup input") + + args = parser.parse_args() + + print(generate_matrix(args.schedule, args.test_setup)) diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json new file mode 100644 index 000000000..ab42b2009 --- /dev/null +++ b/tests/integration/ci_matrix.json @@ -0,0 +1,6 @@ +{ + "default": [ + {"suite": "base", "setup": "ollama"}, + {"suite": "vision", "setup": "ollama-vision"} + ] +}