mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Added a script to cleanup recordings. While doing this, moved the CI matrix generation to a separate script so there is a single source of truth for the matrix. Ran the cleanup script as: ``` PYTHONPATH=. python scripts/cleanup_recordings.py ``` Also added this as part of the pre-commit workflow to ensure that the recordings are always up to date and that no stale recordings are left in the repo. <hr>This is an automatic backport of pull request #4074 done by [Mergify](https://mergify.com). --------- Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
0d525d9a24
commit
56d87f5133
4 changed files with 363 additions and 12 deletions
36
.github/workflows/integration-tests.yml
vendored
36
.github/workflows/integration-tests.yml
vendored
|
|
@ -22,6 +22,8 @@ on:
|
||||||
- '.github/actions/setup-ollama/action.yml'
|
- '.github/actions/setup-ollama/action.yml'
|
||||||
- '.github/actions/setup-test-environment/action.yml'
|
- '.github/actions/setup-test-environment/action.yml'
|
||||||
- '.github/actions/run-and-record-tests/action.yml'
|
- '.github/actions/run-and-record-tests/action.yml'
|
||||||
|
- 'scripts/integration-tests.sh'
|
||||||
|
- 'scripts/generate_ci_matrix.py'
|
||||||
schedule:
|
schedule:
|
||||||
# If changing the cron schedule, update the provider in the test-matrix job
|
# If changing the cron schedule, update the provider in the test-matrix job
|
||||||
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
||||||
|
|
@ -43,8 +45,27 @@ concurrency:
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
generate-matrix:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
|
- name: Generate test matrix
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
# Generate matrix from CI_MATRIX in tests/integration/suites.py
|
||||||
|
# Supports schedule-based and manual input overrides
|
||||||
|
MATRIX=$(PYTHONPATH=. python3 scripts/generate_ci_matrix.py \
|
||||||
|
--schedule "${{ github.event.schedule }}" \
|
||||||
|
--test-setup "${{ github.event.inputs.test-setup }}")
|
||||||
|
echo "matrix=$MATRIX" >> $GITHUB_OUTPUT
|
||||||
|
echo "Generated matrix: $MATRIX"
|
||||||
|
|
||||||
run-replay-mode-tests:
|
run-replay-mode-tests:
|
||||||
|
needs: generate-matrix
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
|
||||||
|
|
||||||
|
|
@ -55,18 +76,9 @@ jobs:
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
# Define (setup, suite) pairs - they are always matched and cannot be independent
|
# Test configurations: Generated from CI_MATRIX in tests/integration/suites.py
|
||||||
# Weekly schedule (Sun 1 AM): vllm+base
|
# See scripts/generate_ci_matrix.py for generation logic
|
||||||
# Input test-setup=ollama-vision: ollama-vision+vision
|
config: ${{ fromJSON(needs.generate-matrix.outputs.matrix).include }}
|
||||||
# Default (including test-setup=ollama): ollama+base, ollama-vision+vision, gpt+responses
|
|
||||||
config: >-
|
|
||||||
${{
|
|
||||||
github.event.schedule == '1 0 * * 0'
|
|
||||||
&& fromJSON('[{"setup": "vllm", "suite": "base"}]')
|
|
||||||
|| github.event.inputs.test-setup == 'ollama-vision'
|
|
||||||
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|
|
||||||
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]')
|
|
||||||
}}
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|
|
||||||
272
scripts/cleanup_recordings.py
Executable file
272
scripts/cleanup_recordings.py
Executable file
|
|
@ -0,0 +1,272 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Clean up unused test recordings based on CI test collection.
|
||||||
|
|
||||||
|
This script:
|
||||||
|
1. Reads CI matrix definitions from tests/integration/ci_matrix.json (default + scheduled overrides)
|
||||||
|
2. Uses pytest --collect-only with --json-report to gather all test IDs that run in CI
|
||||||
|
3. Compares against existing recordings to identify unused ones
|
||||||
|
4. Optionally deletes unused recordings
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Dry run - see what would be deleted
|
||||||
|
./scripts/cleanup_recordings.py
|
||||||
|
|
||||||
|
# Save manifest of CI test IDs for inspection
|
||||||
|
./scripts/cleanup_recordings.py --manifest ci_tests.txt
|
||||||
|
|
||||||
|
# Actually delete unused recordings
|
||||||
|
./scripts/cleanup_recordings.py --delete
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).parent.parent
|
||||||
|
|
||||||
|
# Load CI matrix from JSON file
|
||||||
|
CI_MATRIX_FILE = REPO_ROOT / "tests/integration/ci_matrix.json"
|
||||||
|
with open(CI_MATRIX_FILE) as f:
|
||||||
|
_matrix_config = json.load(f)
|
||||||
|
|
||||||
|
DEFAULT_CI_MATRIX: list[dict[str, str]] = _matrix_config["default"]
|
||||||
|
SCHEDULED_MATRICES: dict[str, list[dict[str, str]]] = _matrix_config.get("schedules", {})
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_configs(entries):
|
||||||
|
seen: set[tuple[str, str]] = set()
|
||||||
|
for entry in entries:
|
||||||
|
suite = entry["suite"]
|
||||||
|
setup = entry["setup"]
|
||||||
|
key = (suite, setup)
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
yield {"suite": suite, "setup": setup}
|
||||||
|
|
||||||
|
|
||||||
|
def iter_all_ci_configs() -> list[dict[str, str]]:
|
||||||
|
"""Return unique CI configs across default and scheduled matrices."""
|
||||||
|
combined = list(DEFAULT_CI_MATRIX)
|
||||||
|
for configs in SCHEDULED_MATRICES.values():
|
||||||
|
combined.extend(configs)
|
||||||
|
return list(_unique_configs(combined))
|
||||||
|
|
||||||
|
|
||||||
|
def collect_ci_tests():
|
||||||
|
"""Collect all test IDs that would run in CI using --collect-only with JSON output."""
|
||||||
|
|
||||||
|
all_test_ids = set()
|
||||||
|
configs = iter_all_ci_configs()
|
||||||
|
|
||||||
|
for config in configs:
|
||||||
|
print(f"Collecting tests for suite={config['suite']}, setup={config['setup']}...")
|
||||||
|
|
||||||
|
# Create a temporary file for JSON report
|
||||||
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
||||||
|
json_report_file = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Configure environment for collection run
|
||||||
|
env = os.environ.copy()
|
||||||
|
env["PYTEST_ADDOPTS"] = f"--json-report --json-report-file={json_report_file}"
|
||||||
|
repo_path = str(REPO_ROOT)
|
||||||
|
existing_path = env.get("PYTHONPATH", "")
|
||||||
|
env["PYTHONPATH"] = f"{repo_path}{os.pathsep}{existing_path}" if existing_path else repo_path
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"./scripts/integration-tests.sh",
|
||||||
|
"--collect-only",
|
||||||
|
"--suite",
|
||||||
|
config["suite"],
|
||||||
|
"--setup",
|
||||||
|
config["setup"],
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
cwd=REPO_ROOT,
|
||||||
|
env=env,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Test collection failed.\n"
|
||||||
|
f"Command: {' '.join(result.args)}\n"
|
||||||
|
f"stdout:\n{result.stdout}\n"
|
||||||
|
f"stderr:\n{result.stderr}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse JSON report to extract test IDs
|
||||||
|
try:
|
||||||
|
with open(json_report_file) as f:
|
||||||
|
report = json.load(f)
|
||||||
|
|
||||||
|
# The "collectors" field contains collected test items
|
||||||
|
# Each collector has a "result" array with test node IDs
|
||||||
|
for collector in report.get("collectors", []):
|
||||||
|
for item in collector.get("result", []):
|
||||||
|
# The "nodeid" field is the test ID
|
||||||
|
if "nodeid" in item:
|
||||||
|
all_test_ids.add(item["nodeid"])
|
||||||
|
|
||||||
|
print(f" Collected {len(all_test_ids)} test IDs so far")
|
||||||
|
|
||||||
|
except (json.JSONDecodeError, FileNotFoundError) as e:
|
||||||
|
print(f" Warning: Failed to parse JSON report: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up temp file
|
||||||
|
if os.path.exists(json_report_file):
|
||||||
|
os.unlink(json_report_file)
|
||||||
|
|
||||||
|
print(f"\nTotal unique test IDs collected: {len(all_test_ids)}")
|
||||||
|
return all_test_ids, configs
|
||||||
|
|
||||||
|
|
||||||
|
def get_base_test_id(test_id: str) -> str:
|
||||||
|
"""Extract base test ID without parameterization.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
'tests/integration/inference/test_foo.py::test_bar[param1-param2]'
|
||||||
|
-> 'tests/integration/inference/test_foo.py::test_bar'
|
||||||
|
"""
|
||||||
|
return test_id.split("[")[0] if "[" in test_id else test_id
|
||||||
|
|
||||||
|
|
||||||
|
def find_all_recordings():
|
||||||
|
"""Find all recording JSON files."""
|
||||||
|
return list((REPO_ROOT / "tests/integration").rglob("recordings/*.json"))
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_recordings(ci_test_ids, dry_run=True):
|
||||||
|
"""Analyze recordings and identify unused ones."""
|
||||||
|
|
||||||
|
# Use full test IDs with parameterization for exact matching
|
||||||
|
all_recordings = find_all_recordings()
|
||||||
|
print(f"\nTotal recording files: {len(all_recordings)}")
|
||||||
|
|
||||||
|
# Categorize recordings
|
||||||
|
used_recordings = []
|
||||||
|
unused_recordings = []
|
||||||
|
shared_recordings = [] # model-list endpoints without test_id
|
||||||
|
parse_errors = []
|
||||||
|
|
||||||
|
for json_file in all_recordings:
|
||||||
|
try:
|
||||||
|
with open(json_file) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
test_id = data.get("test_id", "")
|
||||||
|
|
||||||
|
if not test_id:
|
||||||
|
# Shared/infrastructure recordings (model lists, etc)
|
||||||
|
shared_recordings.append(json_file)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Match exact test_id (with full parameterization)
|
||||||
|
if test_id in ci_test_ids:
|
||||||
|
used_recordings.append(json_file)
|
||||||
|
else:
|
||||||
|
unused_recordings.append((json_file, test_id))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
parse_errors.append((json_file, str(e)))
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
print("\nRecording Analysis:")
|
||||||
|
print(f" Used in CI: {len(used_recordings)}")
|
||||||
|
print(f" Shared (no ID): {len(shared_recordings)}")
|
||||||
|
print(f" UNUSED: {len(unused_recordings)}")
|
||||||
|
print(f" Parse errors: {len(parse_errors)}")
|
||||||
|
|
||||||
|
if unused_recordings:
|
||||||
|
print("\nUnused recordings by test:")
|
||||||
|
|
||||||
|
# Group by base test ID
|
||||||
|
by_test = defaultdict(list)
|
||||||
|
for file, test_id in unused_recordings:
|
||||||
|
base = get_base_test_id(test_id)
|
||||||
|
by_test[base].append(file)
|
||||||
|
|
||||||
|
for base_test, files in sorted(by_test.items()):
|
||||||
|
print(f"\n {base_test}")
|
||||||
|
print(f" ({len(files)} recording(s))")
|
||||||
|
for f in files[:3]:
|
||||||
|
print(f" - {f.relative_to(REPO_ROOT / 'tests/integration')}")
|
||||||
|
if len(files) > 3:
|
||||||
|
print(f" ... and {len(files) - 3} more")
|
||||||
|
|
||||||
|
if parse_errors:
|
||||||
|
print("\nParse errors:")
|
||||||
|
for file, error in parse_errors[:5]:
|
||||||
|
print(f" {file.relative_to(REPO_ROOT)}: {error}")
|
||||||
|
if len(parse_errors) > 5:
|
||||||
|
print(f" ... and {len(parse_errors) - 5} more")
|
||||||
|
|
||||||
|
# Perform cleanup
|
||||||
|
if not dry_run:
|
||||||
|
print(f"\nDeleting {len(unused_recordings)} unused recordings...")
|
||||||
|
for file, _ in unused_recordings:
|
||||||
|
file.unlink()
|
||||||
|
print(f" Deleted: {file.relative_to(REPO_ROOT / 'tests/integration')}")
|
||||||
|
print("✅ Cleanup complete")
|
||||||
|
else:
|
||||||
|
print("\n(Dry run - no files deleted)")
|
||||||
|
print("\nTo delete these files, run with --delete")
|
||||||
|
|
||||||
|
return len(unused_recordings)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Clean up unused test recordings based on CI test collection",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog=__doc__,
|
||||||
|
)
|
||||||
|
parser.add_argument("--delete", action="store_true", help="Actually delete unused recordings (default is dry-run)")
|
||||||
|
parser.add_argument("--manifest", help="Save collected test IDs to file (optional)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("Recording Cleanup Utility")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
ci_configs = iter_all_ci_configs()
|
||||||
|
|
||||||
|
print(f"\nDetected CI configurations: {len(ci_configs)}")
|
||||||
|
for config in ci_configs:
|
||||||
|
print(f" - suite={config['suite']}, setup={config['setup']}")
|
||||||
|
|
||||||
|
# Collect test IDs from CI configurations
|
||||||
|
ci_test_ids, _ = collect_ci_tests()
|
||||||
|
|
||||||
|
if args.manifest:
|
||||||
|
with open(args.manifest, "w") as f:
|
||||||
|
for test_id in sorted(ci_test_ids):
|
||||||
|
f.write(f"{test_id}\n")
|
||||||
|
print(f"\nSaved test IDs to: {args.manifest}")
|
||||||
|
|
||||||
|
# Analyze and cleanup
|
||||||
|
unused_count = analyze_recordings(ci_test_ids, dry_run=not args.delete)
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
if unused_count > 0 and not args.delete:
|
||||||
|
print("Run with --delete to remove unused recordings")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
61
scripts/generate_ci_matrix.py
Executable file
61
scripts/generate_ci_matrix.py
Executable file
|
|
@ -0,0 +1,61 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Generate CI test matrix from ci_matrix.json with schedule/input overrides.
|
||||||
|
|
||||||
|
This script is used by .github/workflows/integration-tests.yml to generate
|
||||||
|
the test matrix dynamically based on the CI_MATRIX definition.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
CI_MATRIX_FILE = Path(__file__).parent.parent / "tests/integration/ci_matrix.json"
|
||||||
|
|
||||||
|
with open(CI_MATRIX_FILE) as f:
|
||||||
|
matrix_config = json.load(f)
|
||||||
|
|
||||||
|
DEFAULT_MATRIX = matrix_config["default"]
|
||||||
|
SCHEDULE_MATRICES: dict[str, list[dict[str, str]]] = matrix_config.get("schedules", {})
|
||||||
|
|
||||||
|
|
||||||
|
def generate_matrix(schedule="", test_setup=""):
|
||||||
|
"""
|
||||||
|
Generate test matrix based on schedule or manual input.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
schedule: GitHub cron schedule string (e.g., "1 0 * * 0" for weekly)
|
||||||
|
test_setup: Manual test setup input (e.g., "ollama-vision")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Matrix configuration as JSON string
|
||||||
|
"""
|
||||||
|
# Weekly scheduled test matrices
|
||||||
|
if schedule and schedule in SCHEDULE_MATRICES:
|
||||||
|
matrix = SCHEDULE_MATRICES[schedule]
|
||||||
|
# Manual input for specific setup
|
||||||
|
elif test_setup == "ollama-vision":
|
||||||
|
matrix = [{"suite": "vision", "setup": "ollama-vision"}]
|
||||||
|
# Default: use JSON-defined matrix
|
||||||
|
else:
|
||||||
|
matrix = DEFAULT_MATRIX
|
||||||
|
|
||||||
|
# GitHub Actions expects {"include": [...]} format
|
||||||
|
return json.dumps({"include": matrix})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Generate CI test matrix")
|
||||||
|
parser.add_argument("--schedule", default="", help="GitHub schedule cron string")
|
||||||
|
parser.add_argument("--test-setup", default="", help="Manual test setup input")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(generate_matrix(args.schedule, args.test_setup))
|
||||||
6
tests/integration/ci_matrix.json
Normal file
6
tests/integration/ci_matrix.json
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"default": [
|
||||||
|
{"suite": "base", "setup": "ollama"},
|
||||||
|
{"suite": "vision", "setup": "ollama-vision"}
|
||||||
|
]
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue