From 8adf6d2fe561d6315b4aa243262515836687d2ca Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 4 Nov 2025 17:10:40 -0800 Subject: [PATCH] changes --- .github/workflows/pre-commit.yml | 11 ++++++ scripts/cleanup_recordings.py | 58 +++++++++++++++----------------- scripts/generate_ci_matrix.py | 24 +++++++------ tests/integration/ci_matrix.json | 12 +++++++ tests/integration/suites.py | 13 ------- 5 files changed, 64 insertions(+), 54 deletions(-) create mode 100644 tests/integration/ci_matrix.json diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index a187cbd1c..74f7da19a 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -165,3 +165,14 @@ jobs: echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'." fi exit $status + + - name: Check if any unused recordings + run: | + set -e + PYTHONPATH=$PWD uv run ./scripts/cleanup_recordings.py --delete + changes=$(git status --short tests/integration | grep 'recordings' || true) + if [ -n "$changes" ]; then + echo "::error::Unused integration recordings detected. Run 'PYTHONPATH=$(pwd) uv run ./scripts/cleanup_recordings.py --delete' locally and commit the deletions." + echo "$changes" + exit 1 + fi diff --git a/scripts/cleanup_recordings.py b/scripts/cleanup_recordings.py index e0f62d1a2..14f8cce84 100755 --- a/scripts/cleanup_recordings.py +++ b/scripts/cleanup_recordings.py @@ -9,7 +9,7 @@ Clean up unused test recordings based on CI test collection. This script: -1. Resolves the CI test matrix by combining the default CI_MATRIX with scheduled overrides +1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides) 2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI 3. Compares against existing recordings to identify unused ones 4. Optionally deletes unused recordings @@ -34,45 +34,43 @@ from collections import defaultdict from pathlib import Path REPO_ROOT = Path(__file__).parent.parent -from tests.integration.suites import CI_MATRIX # noqa: E402 -# Additional scheduled CI configurations (keep in sync with scripts/generate_ci_matrix.py) -ADDITIONAL_CI_CONFIGS = [ - {"suite": "base", "setup": "vllm"}, # Weekly vLLM coverage -] +# Load CI matrix from JSON file +CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json" +with open(CI_MATRIX_FILE) as f: + _matrix_config = json.load(f) + +DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"] +SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {}) -def load_ci_configs() -> list[dict[str, str]]: - """Return all (suite, setup) combinations exercised in CI.""" - - configs: list[dict[str, str]] = [] +def _unique_configs(entries): seen: set[tuple[str, str]] = set() - - def add(entry: dict[str, str]) -> None: - suite = entry.get("suite") - setup = entry.get("setup") - if not suite or not setup: - raise RuntimeError(f"Invalid CI matrix entry: {entry}") + for entry in entries: + suite = entry["suite"] + setup = entry["setup"] key = (suite, setup) if key in seen: - return + continue seen.add(key) - configs.append({"suite": suite, "setup": setup}) - - for entry in CI_MATRIX: - add(entry) - for entry in ADDITIONAL_CI_CONFIGS: - add(entry) - - return configs + yield {"suite": suite, "setup": setup} -def collect_ci_tests(ci_configs: list[dict[str, str]]): +def iter_all_ci_configs() -> list[dict[str, str]]: + """Return unique CI configs across default and scheduled matrices.""" + combined = list(DEFAULT_CI_MATRIX) + for configs in SCHEDULED_MATRICES.values(): + combined.extend(configs) + return list(_unique_configs(combined)) + + +def collect_ci_tests(): """Collect all test IDs that would run in CI using --collect-only with JSON output.""" all_test_ids = set() + configs = iter_all_ci_configs() - for config in ci_configs: + for config in configs: print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...") # Create a temporary file for JSON report @@ -135,7 +133,7 @@ def collect_ci_tests(ci_configs: list[dict[str, str]]): os.unlink(json_report_file) print(f"\nTotal unique test IDs collected: {len(all_test_ids)}") - return all_test_ids + return all_test_ids, configs def get_base_test_id(test_id: str) -> str: @@ -247,14 +245,14 @@ def main(): print("Recording Cleanup Utility") print("=" * 60) - ci_configs = load_ci_configs() + ci_configs = iter_all_ci_configs() print(f"\nDetected CI configurations: {len(ci_configs)}") for config in ci_configs: print(f" - suite={config['suite']}, setup={config['setup']}") # Collect test IDs from CI configurations - ci_test_ids = collect_ci_tests(ci_configs) + ci_test_ids, _ = collect_ci_tests() if args.manifest: with open(args.manifest, "w") as f: diff --git a/scripts/generate_ci_matrix.py b/scripts/generate_ci_matrix.py index 57afd6113..0d4e924b3 100755 --- a/scripts/generate_ci_matrix.py +++ b/scripts/generate_ci_matrix.py @@ -6,20 +6,22 @@ # the root directory of this source tree. """ -Generate CI test matrix from suites.py with schedule/input overrides. +Generate CI test matrix from ci_matrix.json with schedule/input overrides. This script is used by .github/workflows/integration-tests.yml to generate -the test matrix dynamically based on the CI_MATRIX definition in suites.py. +the test matrix dynamically based on the CI_MATRIX definition. """ import json -import sys from pathlib import Path -# Add tests/integration to path -sys.path.insert(0, str(Path(__file__).parent.parent / "tests/integration")) +CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json" -from suites import CI_MATRIX +with open(CI_MATRIX_FILE) as f: + matrix_config = json.load(f) + +DEFAULT_MATRIX = matrix_config["default"] +SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {}) def generate_matrix(schedule="", test_setup=""): @@ -33,15 +35,15 @@ def generate_matrix(schedule="", test_setup=""): Returns: Matrix configuration as JSON string """ - # Weekly vllm test on Sunday - if schedule == "1 0 * * 0": - matrix = [{"suite": "base", "setup": "vllm"}] + # Weekly scheduled test matrices + if schedule and schedule in SCHEDULE_MATRICES: + matrix = SCHEDULE_MATRICES[schedule] # Manual input for specific setup elif test_setup == "ollama-vision": matrix = [{"suite": "vision", "setup": "ollama-vision"}] - # Default: use CI_MATRIX from suites.py + # Default: use JSON-defined matrix else: - matrix = CI_MATRIX + matrix = DEFAULT_MATRIX # GitHub Actions expects {"include": [...]} format return json.dumps({"include": matrix}) diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json new file mode 100644 index 000000000..314070eab --- /dev/null +++ b/tests/integration/ci_matrix.json @@ -0,0 +1,12 @@ +{ + "default": [ + {"suite": "base", "setup": "ollama"}, + {"suite": "vision", "setup": "ollama-vision"}, + {"suite": "responses", "setup": "gpt"} + ], + "schedules": { + "1 0 * * 0": [ + {"suite": "base", "setup": "vllm"} + ] + } +} diff --git a/tests/integration/suites.py b/tests/integration/suites.py index c2780325d..e1fb6a1c7 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -180,16 +180,3 @@ SUITE_DEFINITIONS: dict[str, Suite] = { default_setup="ollama-vision", ), } - -# CI test matrix - single source of truth for continuous integration test configurations -# This is used by: -# - .github/workflows/integration-tests.yml (CI jobs) -# - scripts/cleanup_recordings.py (unused recording cleanup) -# -# Each entry defines a (suite, setup) pair that runs in CI. -# Note: Special test configurations (vllm weekly tests, manual inputs) are handled in the CI workflow. -CI_MATRIX = [ - {"suite": "base", "setup": "ollama"}, - {"suite": "vision", "setup": "ollama-vision"}, - {"suite": "responses", "setup": "gpt"}, -]