diff --git a/llama_stack/providers/utils/vector_io/chunk_utils.py b/llama_stack/providers/utils/vector_io/chunk_utils.py index a56fd28bb..b133ae1d5 100644 --- a/llama_stack/providers/utils/vector_io/chunk_utils.py +++ b/llama_stack/providers/utils/vector_io/chunk_utils.py @@ -9,6 +9,18 @@ import uuid def generate_chunk_id(document_id: str, chunk_text: str) -> str: - """Generate a unique chunk ID using a hash of document ID and chunk text.""" + """ + Generate a unique chunk ID using a hash of the document ID and chunk text. + + Note: MD5 is used only to calculate an identifier, not for security purposes. + Adding usedforsecurity=False for compatibility with FIPS environments. + """ hash_input = f"{document_id}:{chunk_text}".encode() - return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest())) + + try: + md5_hash = hashlib.md5(hash_input, usedforsecurity=False).hexdigest() + except TypeError: + # Fallback for environments that don't support usedforsecurity (e.g., Python < 3.9 or non-OpenSSL backends) + md5_hash = hashlib.md5(hash_input).hexdigest() + + return str(uuid.UUID(md5_hash))