feat: migrate to FIPS-validated cryptographic algorithms

Signed-off-by: Doug Edgar <dedgar@redhat.com>
This commit is contained in:
Doug Edgar 2025-09-11 15:11:00 -07:00
parent c7ef1f13df
commit f7e4395380
4 changed files with 16 additions and 21 deletions

View file

@ -48,15 +48,12 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
# NOTE: MD5 is used here only for download integrity verification,
# not for security purposes
# TODO: switch to SHA256
md5_hash = hashlib.md5(usedforsecurity=False)
def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""):
md5_hash.update(chunk)
return md5_hash.hexdigest()
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def load_checksums(checklist_path: Path) -> dict[str, str]:
@ -64,10 +61,10 @@ def load_checksums(checklist_path: Path) -> dict[str, str]:
with open(checklist_path) as f:
for line in f:
if line.strip():
md5sum, filepath = line.strip().split(" ", 1)
sha256sum, filepath = line.strip().split(" ", 1)
# Remove leading './' if present
filepath = filepath.lstrip("./")
checksums[filepath] = md5sum
checksums[filepath] = sha256sum
return checksums
@ -88,7 +85,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
matches = False
if exists:
actual_hash = calculate_md5(full_path)
actual_hash = calculate_sha256(full_path)
matches = actual_hash == expected_hash
results.append(

View file

@ -12,14 +12,12 @@ import uuid
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
"""
Generate a unique chunk ID using a hash of the document ID and chunk text.
Note: MD5 is used only to calculate an identifier, not for security purposes.
Adding usedforsecurity=False for compatibility with FIPS environments.
Then use the first 32 characters of the hash to create a UUID.
"""
hash_input = f"{document_id}:{chunk_text}".encode()
if chunk_window:
hash_input += f":{chunk_window}".encode()
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
return str(uuid.UUID(hashlib.sha256(hash_input).hexdigest()[:32]))
def proper_case(s: str) -> str:

View file

@ -211,7 +211,7 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
return sorted(set(idents))
identifiers = _extract_model_identifiers()
return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
return hashlib.sha256(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None: