litellm-mirror/tests/code_coverage_tests/check_licenses.py
Krish Dholakia 49610485a2
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 15s
feat: add oss license check for related packages (#8623)
ensure all packages used are permissable
2025-02-18 10:40:00 -08:00

295 lines
11 KiB
Python

#!/usr/bin/env python3
import sys
import pkg_resources
import requests
from pathlib import Path
import json
from typing import Dict, List, Optional, Set, Tuple
import configparser
import re
from dataclasses import dataclass
@dataclass
class PackageLicense:
name: str
version: Optional[str]
license_type: Optional[str]
is_authorized: bool
reason: str
class LicenseChecker:
def __init__(self, config_file: Path = Path("liccheck.ini")):
if not config_file.exists():
print(f"Error: Config file {config_file} not found")
sys.exit(1)
self.config = configparser.ConfigParser(allow_no_value=True)
self.config.read(config_file)
# Initialize license sets
self.authorized_licenses = self._parse_license_list(
"Licenses", "authorized_licenses"
)
self.unauthorized_licenses = self._parse_license_list(
"Licenses", "unauthorized_licenses"
)
# Parse authorized packages
self.authorized_packages = self._parse_authorized_packages()
# Initialize cache
self.cache_file = Path("license_cache.json")
self.license_cache: Dict[str, str] = {}
if self.cache_file.exists():
with open(self.cache_file) as f:
self.license_cache = json.load(f)
# Track package results
self.package_results: List[PackageLicense] = []
def _parse_license_list(self, section: str, option: str) -> Set[str]:
"""Parse license list from config, handling comments and whitespace."""
if not self.config.has_option(section, option):
return set()
licenses = set()
for line in self.config.get(section, option).split("\n"):
line = line.strip().lower()
if line and not line.startswith("#"):
licenses.add(line)
return licenses
def _parse_authorized_packages(self) -> Dict[str, Dict[str, str]]:
"""Parse authorized packages with their version specs and comments."""
authorized = {}
if self.config.has_section("Authorized Packages"):
for package, spec in self.config.items("Authorized Packages"):
if not package.startswith("#"):
package = package.strip().lower()
parts = spec.split("#", 1)
version_spec = parts[0].strip()
comment = parts[1].strip() if len(parts) > 1 else ""
authorized[package] = {
"version_spec": version_spec,
"comment": comment,
}
return authorized
def get_package_license_from_pypi(
self, package_name: str, version: str
) -> Optional[str]:
"""Fetch license information for a package from PyPI."""
try:
url = f"https://pypi.org/pypi/{package_name}/{version}/json"
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
return data.get("info", {}).get("license")
except Exception as e:
print(
f"Warning: Failed to fetch license for {package_name} {version}: {str(e)}"
)
return None
def is_license_acceptable(self, license_str: str) -> Tuple[bool, str]:
"""Check if a license is acceptable based on configured lists."""
if not license_str:
return False, "Unknown license"
# Normalize license string to handle common variations
normalized_license = license_str.lower()
normalized_license = normalized_license.replace("-", " ").replace("_", " ")
# Special case for BSD licenses
if "bsd" in normalized_license:
if any(
variation in normalized_license
for variation in ["3 clause", "3-clause", "new", "simplified"]
):
return True, "Matches authorized license: BSD 3-Clause"
# Check unauthorized licenses first
for unauth in self.unauthorized_licenses:
if unauth in normalized_license:
return False, f"Matches unauthorized license: {unauth}"
# Then check authorized licenses
for auth in self.authorized_licenses:
if auth in normalized_license:
return True, f"Matches authorized license: {auth}"
return False, "License not in authorized list"
def check_package(self, package_name: str, version: Optional[str] = None) -> bool:
"""Check if a specific package version is compliant."""
package_lower = package_name.lower()
# Check if package is in authorized packages list
if package_lower in self.authorized_packages:
pkg_info = self.authorized_packages[package_lower]
# If there's a comment, consider it manually verified
if pkg_info.get("comment"):
result = PackageLicense(
name=package_name,
version=version,
license_type=pkg_info["comment"],
is_authorized=True,
reason="Manually verified in config",
)
self.package_results.append(result)
print(f"{package_name}: Manually verified - {pkg_info['comment']}")
return True
# If no comment, proceed with license check but package is considered authorized
license_type = self.get_package_license_from_pypi(
package_name, version or ""
)
if license_type:
is_acceptable, reason = self.is_license_acceptable(license_type)
result = PackageLicense(
name=package_name,
version=version,
license_type=license_type,
is_authorized=True, # Package is authorized even if license check fails
reason=f"Listed in authorized packages - {license_type}",
)
self.package_results.append(result)
print(
f"{package_name}: {license_type} (Listed in authorized packages)"
)
return True
# If package is not authorized or authorization check failed, proceed with normal license check
cache_key = f"{package_name}:{version}" if version else package_name
if cache_key in self.license_cache:
license_type = self.license_cache[cache_key]
else:
license_type = self.get_package_license_from_pypi(
package_name, version or ""
)
if license_type:
self.license_cache[cache_key] = license_type
if not license_type:
result = PackageLicense(
name=package_name,
version=version,
license_type=None,
is_authorized=False,
reason="Could not determine license",
)
self.package_results.append(result)
print(f"⚠️ Warning: Could not determine license for {package_name}")
return False
is_acceptable, reason = self.is_license_acceptable(license_type)
result = PackageLicense(
name=package_name,
version=version,
license_type=license_type,
is_authorized=is_acceptable,
reason=reason,
)
self.package_results.append(result)
if is_acceptable:
print(f"{package_name}: {license_type}")
else:
print(f"{package_name}: {license_type} - {reason}")
return is_acceptable
def check_requirements(self, requirements_file: Path) -> bool:
"""Check all packages in a requirements file."""
print(f"\nChecking licenses for packages in {requirements_file}...")
try:
with open(requirements_file) as f:
requirements = [
pkg_resources.Requirement.parse(line)
for line in f
if line.strip() and not line.startswith("#")
]
except Exception as e:
print(f"Error parsing {requirements_file}: {str(e)}")
return False
all_compliant = True
for req in requirements:
try:
version = next(iter(req.specs))[1] if req.specs else None
except Exception:
version = None
if not self.check_package(req.name, version):
all_compliant = False
# Save updated cache
with open(self.cache_file, "w") as f:
json.dump(self.license_cache, f, indent=2)
return all_compliant
def main():
req_file = "../../requirements.txt"
checker = LicenseChecker()
# Check requirements
if not checker.check_requirements(Path(req_file)):
# Get lists of problematic packages
unverified = [p for p in checker.package_results if not p.license_type]
invalid = [
p for p in checker.package_results if p.license_type and not p.is_authorized
]
# Print detailed information about problematic packages
if unverified:
print("\n❌ Packages with unknown licenses:")
for pkg in unverified:
version_str = f" ({pkg.version})" if pkg.version else ""
print(f"- {pkg.name}{version_str}")
if invalid:
print("\n❌ Packages with unauthorized licenses:")
for pkg in invalid:
version_str = f" ({pkg.version})" if pkg.version else ""
print(f"- {pkg.name}{version_str}: {pkg.license_type}")
# Only error if there are packages that aren't explicitly authorized
unhandled_packages = [
p
for p in (unverified + invalid)
if p.name.lower() not in checker.authorized_packages
]
if unhandled_packages:
print("\n❌ Error: Found packages that need verification:")
for pkg in unhandled_packages:
version_str = f" ({pkg.version})" if pkg.version else ""
license_str = (
f" - {pkg.license_type}"
if pkg.license_type
else " - Unknown license"
)
print(f"- {pkg.name}{version_str}{license_str}")
print(
"\nAdd these packages to the [Authorized Packages] section in liccheck.ini with a comment about their license verification."
)
print("Example:")
print("package-name: >=1.0.0 # MIT license manually verified")
sys.exit(1)
else:
print("\n✅ All dependencies have acceptable licenses.")
sys.exit(0)
if __name__ == "__main__":
main()