From 42c23b45f62ffbf47bf61d1d102bbc12da239a9b Mon Sep 17 00:00:00 2001 From: Doug Edgar Date: Wed, 17 Sep 2025 15:10:10 -0700 Subject: [PATCH] feat: update qdrant hash function from SHA-1 to SHA-256 (#3477) # What does this PR do? Updates the qdrant provider's convert_id function to use a FIPS-validated cryptographic hashing function, so that llama-stack is considered to be `Designed for FIPS`. The standard library `uuid.uuid5()` function uses SHA-1 under the hood, which is not FIPS-validated. This commit uses an approach similar to the one merged in #3423. Closes #3476. ## Test Plan Unit tests from scripts/unit-tests.sh were ran to verify that the tests pass. A small test script can display the data flow: ```python import hashlib import uuid # Input _id = "chunk_abc123" print(_id) # Step 1: Format and encode hash_input = f"qdrant_id:{_id}".encode() print(hash_input) # Result: b'qdrant_id:chunk_abc123' # Step 2: SHA-256 hash sha256_hash = hashlib.sha256(hash_input).hexdigest() print(sha256_hash) # Result: "184893a6eafeaac487cb9166351e8625b994d50f3456d8bc6cea32a014a27151" # Step 3: Create UUID from first 32 chars uuid_string = str(uuid.UUID(sha256_hash[:32])) print(uuid_string) # sha256_hash[:32] = "184893a6eafeaac487cb9166351e8625" # Final result: "184893a6-eafe-aac4-87cb-9166351e8625" ``` Signed-off-by: Doug Edgar --- llama_stack/providers/remote/vector_io/qdrant/qdrant.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 0a0faa23a..ec3869495 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import asyncio +import hashlib import uuid from typing import Any @@ -49,10 +50,13 @@ def convert_id(_id: str) -> str: Converts any string into a UUID string based on a seed. Qdrant accepts UUID strings and unsigned integers as point ID. - We use a seed to convert each string into a UUID string deterministically. + We use a SHA-256 hash to convert each string into a UUID string deterministically. This allows us to overwrite the same point with the original ID. """ - return str(uuid.uuid5(uuid.NAMESPACE_DNS, _id)) + hash_input = f"qdrant_id:{_id}".encode() + sha256_hash = hashlib.sha256(hash_input).hexdigest() + # Use the first 32 characters to create a valid UUID + return str(uuid.UUID(sha256_hash[:32])) class QdrantIndex(EmbeddingIndex):