Enhance docstring explaining why usedforsecurity=False was used

Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com>
This commit is contained in:
Jorge Garcia Oncins 2025-07-01 19:28:52 +02:00
parent 57dcd183d5
commit 9c0f2208f4

View file

@ -9,6 +9,18 @@ import uuid
def generate_chunk_id(document_id: str, chunk_text: str) -> str: def generate_chunk_id(document_id: str, chunk_text: str) -> str:
"""Generate a unique chunk ID using a hash of document ID and chunk text.""" """
Generate a unique chunk ID using a hash of the document ID and chunk text.
Note: MD5 is used only to calculate an identifier, not for security purposes.
Adding usedforsecurity=False for compatibility with FIPS environments.
"""
hash_input = f"{document_id}:{chunk_text}".encode() hash_input = f"{document_id}:{chunk_text}".encode()
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
try:
md5_hash = hashlib.md5(hash_input, usedforsecurity=False).hexdigest()
except TypeError:
# Fallback for environments that don't support usedforsecurity (e.g., Python < 3.9 or non-OpenSSL backends)
md5_hash = hashlib.md5(hash_input).hexdigest()
return str(uuid.UUID(md5_hash))