changes

2025-12-03 18:00:36 +00:00 · 2025-11-04 17:10:40 -08:00 · 2025-11-04 17:10:40 -08:00 · 8adf6d2fe5
commit 8adf6d2fe5
parent 02fd375965
5 changed files with 64 additions and 54 deletions
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -165,3 +165,14 @@ jobs:
            echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
          fi
          exit $status
+
+      - name: Check if any unused recordings
+        run: |
+          set -e
+          PYTHONPATH=$PWD uv run ./scripts/cleanup_recordings.py --delete
+          changes=$(git status --short tests/integration | grep 'recordings' || true)
+          if [ -n "$changes" ]; then
+            echo "::error::Unused integration recordings detected. Run 'PYTHONPATH=$(pwd) uv run ./scripts/cleanup_recordings.py --delete' locally and commit the deletions."
+            echo "$changes"
+            exit 1
+          fi
--- a/scripts/cleanup_recordings.py
+++ b/scripts/cleanup_recordings.py
@ -9,7 +9,7 @@
 Clean up unused test recordings based on CI test collection.

 This script:
-1. Resolves the CI test matrix by combining the default CI_MATRIX with scheduled overrides
+1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides)
 2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI
 3. Compares against existing recordings to identify unused ones
 4. Optionally deletes unused recordings
@ -34,45 +34,43 @@ from collections import defaultdict
 from pathlib import Path

 REPO_ROOT = Path(__file__).parent.parent
-from tests.integration.suites import CI_MATRIX  # noqa: E402

-# Additional scheduled CI configurations (keep in sync with scripts/generate_ci_matrix.py)
-ADDITIONAL_CI_CONFIGS = [
-    {"suite": "base", "setup": "vllm"},  # Weekly vLLM coverage
-]
+# Load CI matrix from JSON file
+CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json"
+with open(CI_MATRIX_FILE) as f:
+    _matrix_config = json.load(f)
+
+DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"]
+SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {})


-def load_ci_configs() -> list[dict[str, str]]:
-    """Return all (suite, setup) combinations exercised in CI."""
-
-    configs: list[dict[str, str]] = []
+def _unique_configs(entries):
    seen: set[tuple[str, str]] = set()
-
-    def add(entry: dict[str, str]) -> None:
-        suite = entry.get("suite")
-        setup = entry.get("setup")
-        if not suite or not setup:
-            raise RuntimeError(f"Invalid CI matrix entry: {entry}")
+    for entry in entries:
+        suite = entry["suite"]
+        setup = entry["setup"]
        key = (suite, setup)
        if key in seen:
-            return
+            continue
        seen.add(key)
-        configs.append({"suite": suite, "setup": setup})
-
-    for entry in CI_MATRIX:
-        add(entry)
-    for entry in ADDITIONAL_CI_CONFIGS:
-        add(entry)
-
-    return configs
+        yield {"suite": suite, "setup": setup}


-def collect_ci_tests(ci_configs: list[dict[str, str]]):
+def iter_all_ci_configs() -> list[dict[str, str]]:
+    """Return unique CI configs across default and scheduled matrices."""
+    combined = list(DEFAULT_CI_MATRIX)
+    for configs in SCHEDULED_MATRICES.values():
+        combined.extend(configs)
+    return list(_unique_configs(combined))
+
+
+def collect_ci_tests():
    """Collect all test IDs that would run in CI using --collect-only with JSON output."""

    all_test_ids = set()
+    configs = iter_all_ci_configs()

-    for config in ci_configs:
+    for config in configs:
        print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...")

        # Create a temporary file for JSON report
@ -135,7 +133,7 @@ def collect_ci_tests(ci_configs: list[dict[str, str]]):
                os.unlink(json_report_file)

    print(f"\nTotal unique test IDs collected: {len(all_test_ids)}")
-    return all_test_ids
+    return all_test_ids, configs


 def get_base_test_id(test_id: str) -> str:
@ -247,14 +245,14 @@ def main():
    print("Recording Cleanup Utility")
    print("=" * 60)

-    ci_configs = load_ci_configs()
+    ci_configs = iter_all_ci_configs()

    print(f"\nDetected CI configurations: {len(ci_configs)}")
    for config in ci_configs:
        print(f"  - suite={config['suite']}, setup={config['setup']}")

    # Collect test IDs from CI configurations
-    ci_test_ids = collect_ci_tests(ci_configs)
+    ci_test_ids, _ = collect_ci_tests()

    if args.manifest:
        with open(args.manifest, "w") as f:
--- a/scripts/generate_ci_matrix.py
+++ b/scripts/generate_ci_matrix.py
@ -6,20 +6,22 @@
 # the root directory of this source tree.

 """
-Generate CI test matrix from suites.py with schedule/input overrides.
+Generate CI test matrix from ci_matrix.json with schedule/input overrides.

 This script is used by .github/workflows/integration-tests.yml to generate
-the test matrix dynamically based on the CI_MATRIX definition in suites.py.
+the test matrix dynamically based on the CI_MATRIX definition.
 """

 import json
-import sys
 from pathlib import Path

-# Add tests/integration to path
-sys.path.insert(0, str(Path(__file__).parent.parent / "tests/integration"))
+CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json"

-from suites import CI_MATRIX
+with open(CI_MATRIX_FILE) as f:
+    matrix_config = json.load(f)
+
+DEFAULT_MATRIX = matrix_config["default"]
+SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {})


 def generate_matrix(schedule="", test_setup=""):
@ -33,15 +35,15 @@ def generate_matrix(schedule="", test_setup=""):
    Returns:
        Matrix configuration as JSON string
    """
-    # Weekly vllm test on Sunday
-    if schedule == "1 0 * * 0":
-        matrix = [{"suite": "base", "setup": "vllm"}]
+    # Weekly scheduled test matrices
+    if schedule and schedule in SCHEDULE_MATRICES:
+        matrix = SCHEDULE_MATRICES[schedule]
    # Manual input for specific setup
    elif test_setup == "ollama-vision":
        matrix = [{"suite": "vision", "setup": "ollama-vision"}]
-    # Default: use CI_MATRIX from suites.py
+    # Default: use JSON-defined matrix
    else:
-        matrix = CI_MATRIX
+        matrix = DEFAULT_MATRIX

    # GitHub Actions expects {"include": [...]} format
    return json.dumps({"include": matrix})
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@ -0,0 +1,12 @@
+{
+  "default": [
+    {"suite": "base", "setup": "ollama"},
+    {"suite": "vision", "setup": "ollama-vision"},
+    {"suite": "responses", "setup": "gpt"}
+  ],
+  "schedules": {
+    "1 0 * * 0": [
+      {"suite": "base", "setup": "vllm"}
+    ]
+  }
+}
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -180,16 +180,3 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
        default_setup="ollama-vision",
    ),
 }
-
-# CI test matrix - single source of truth for continuous integration test configurations
-# This is used by:
-# - .github/workflows/integration-tests.yml (CI jobs)
-# - scripts/cleanup_recordings.py (unused recording cleanup)
-#
-# Each entry defines a (suite, setup) pair that runs in CI.
-# Note: Special test configurations (vllm weekly tests, manual inputs) are handled in the CI workflow.
-CI_MATRIX = [
-    {"suite": "base", "setup": "ollama"},
-    {"suite": "vision", "setup": "ollama-vision"},
-    {"suite": "responses", "setup": "gpt"},
-]