mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
feat: migrate to FIPS-validated cryptographic algorithms
Signed-off-by: Doug Edgar <dedgar@redhat.com>
This commit is contained in:
parent
c7ef1f13df
commit
f7e4395380
4 changed files with 16 additions and 21 deletions
|
@ -48,15 +48,12 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
|
||||||
parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
|
parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
|
||||||
|
|
||||||
|
|
||||||
def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
|
def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
|
||||||
# NOTE: MD5 is used here only for download integrity verification,
|
sha256_hash = hashlib.sha256()
|
||||||
# not for security purposes
|
|
||||||
# TODO: switch to SHA256
|
|
||||||
md5_hash = hashlib.md5(usedforsecurity=False)
|
|
||||||
with open(filepath, "rb") as f:
|
with open(filepath, "rb") as f:
|
||||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||||
md5_hash.update(chunk)
|
sha256_hash.update(chunk)
|
||||||
return md5_hash.hexdigest()
|
return sha256_hash.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def load_checksums(checklist_path: Path) -> dict[str, str]:
|
def load_checksums(checklist_path: Path) -> dict[str, str]:
|
||||||
|
@ -64,10 +61,10 @@ def load_checksums(checklist_path: Path) -> dict[str, str]:
|
||||||
with open(checklist_path) as f:
|
with open(checklist_path) as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
if line.strip():
|
if line.strip():
|
||||||
md5sum, filepath = line.strip().split(" ", 1)
|
sha256sum, filepath = line.strip().split(" ", 1)
|
||||||
# Remove leading './' if present
|
# Remove leading './' if present
|
||||||
filepath = filepath.lstrip("./")
|
filepath = filepath.lstrip("./")
|
||||||
checksums[filepath] = md5sum
|
checksums[filepath] = sha256sum
|
||||||
return checksums
|
return checksums
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,7 +85,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
|
||||||
matches = False
|
matches = False
|
||||||
|
|
||||||
if exists:
|
if exists:
|
||||||
actual_hash = calculate_md5(full_path)
|
actual_hash = calculate_sha256(full_path)
|
||||||
matches = actual_hash == expected_hash
|
matches = actual_hash == expected_hash
|
||||||
|
|
||||||
results.append(
|
results.append(
|
||||||
|
|
|
@ -12,14 +12,12 @@ import uuid
|
||||||
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
|
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
|
||||||
"""
|
"""
|
||||||
Generate a unique chunk ID using a hash of the document ID and chunk text.
|
Generate a unique chunk ID using a hash of the document ID and chunk text.
|
||||||
|
Then use the first 32 characters of the hash to create a UUID.
|
||||||
Note: MD5 is used only to calculate an identifier, not for security purposes.
|
|
||||||
Adding usedforsecurity=False for compatibility with FIPS environments.
|
|
||||||
"""
|
"""
|
||||||
hash_input = f"{document_id}:{chunk_text}".encode()
|
hash_input = f"{document_id}:{chunk_text}".encode()
|
||||||
if chunk_window:
|
if chunk_window:
|
||||||
hash_input += f":{chunk_window}".encode()
|
hash_input += f":{chunk_window}".encode()
|
||||||
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
|
return str(uuid.UUID(hashlib.sha256(hash_input).hexdigest()[:32]))
|
||||||
|
|
||||||
|
|
||||||
def proper_case(s: str) -> str:
|
def proper_case(s: str) -> str:
|
||||||
|
|
|
@ -211,7 +211,7 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
||||||
return sorted(set(idents))
|
return sorted(set(idents))
|
||||||
|
|
||||||
identifiers = _extract_model_identifiers()
|
identifiers = _extract_model_identifiers()
|
||||||
return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
|
return hashlib.sha256(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
|
||||||
|
|
||||||
|
|
||||||
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||||
|
|
|
@ -26,9 +26,9 @@ def test_generate_chunk_id():
|
||||||
|
|
||||||
chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
|
chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
|
||||||
assert chunk_ids == [
|
assert chunk_ids == [
|
||||||
"177a1368-f6a8-0c50-6e92-18677f2c3de3",
|
"31d1f9a3-c8d2-66e7-3c37-af2acd329778",
|
||||||
"bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
|
"d07dade7-29c0-cda7-df29-0249a1dcbc3e",
|
||||||
"f68df25d-d9aa-ab4d-5684-64a233add20d",
|
"d14f75a1-5855-7f72-2c78-d9fc4275a346",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,14 +36,14 @@ def test_generate_chunk_id_with_window():
|
||||||
chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
|
chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||||
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
|
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
|
||||||
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
|
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
|
||||||
assert chunk_id1 == "149018fe-d0eb-0f8d-5f7f-726bdd2aeedb"
|
assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866"
|
||||||
assert chunk_id2 == "4562c1ee-9971-1f3b-51a6-7d05e5211154"
|
assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685"
|
||||||
|
|
||||||
|
|
||||||
def test_chunk_id():
|
def test_chunk_id():
|
||||||
# Test with existing chunk ID
|
# Test with existing chunk ID
|
||||||
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
|
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
|
||||||
assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350"
|
assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd"
|
||||||
|
|
||||||
# Test with document ID in metadata
|
# Test with document ID in metadata
|
||||||
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
|
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue