mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
feat: update qdrant hash function from SHA-1 to SHA-256 (#3477)
Some checks failed
Installer CI / smoke-test-on-dev (push) Failing after 3s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Installer CI / lint (push) Failing after 2s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Test Llama Stack Build / generate-matrix (push) Successful in 3s
Python Package Build Test / build (3.13) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 1s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 3s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 3s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Test Llama Stack Build / build-single-provider (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 8s
Test External API and Providers / test-external (venv) (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 2s
UI Tests / ui-tests (22) (push) Successful in 29s
Pre-commit / pre-commit (push) Successful in 1m10s
Some checks failed
Installer CI / smoke-test-on-dev (push) Failing after 3s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Installer CI / lint (push) Failing after 2s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Test Llama Stack Build / generate-matrix (push) Successful in 3s
Python Package Build Test / build (3.13) (push) Failing after 1s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 1s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 3s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 3s
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Test Llama Stack Build / build-single-provider (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 8s
Test External API and Providers / test-external (venv) (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 2s
UI Tests / ui-tests (22) (push) Successful in 29s
Pre-commit / pre-commit (push) Successful in 1m10s
# What does this PR do? Updates the qdrant provider's convert_id function to use a FIPS-validated cryptographic hashing function, so that llama-stack is considered to be `Designed for FIPS`. The standard library `uuid.uuid5()` function uses SHA-1 under the hood, which is not FIPS-validated. This commit uses an approach similar to the one merged in #3423. Closes #3476. ## Test Plan Unit tests from scripts/unit-tests.sh were ran to verify that the tests pass. A small test script can display the data flow: ```python import hashlib import uuid # Input _id = "chunk_abc123" print(_id) # Step 1: Format and encode hash_input = f"qdrant_id:{_id}".encode() print(hash_input) # Result: b'qdrant_id:chunk_abc123' # Step 2: SHA-256 hash sha256_hash = hashlib.sha256(hash_input).hexdigest() print(sha256_hash) # Result: "184893a6eafeaac487cb9166351e8625b994d50f3456d8bc6cea32a014a27151" # Step 3: Create UUID from first 32 chars uuid_string = str(uuid.UUID(sha256_hash[:32])) print(uuid_string) # sha256_hash[:32] = "184893a6eafeaac487cb9166351e8625" # Final result: "184893a6-eafe-aac4-87cb-9166351e8625" ``` Signed-off-by: Doug Edgar <dedgar@redhat.com>
This commit is contained in:
parent
ac1414b571
commit
42c23b45f6
1 changed files with 6 additions and 2 deletions
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import hashlib
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
@ -49,10 +50,13 @@ def convert_id(_id: str) -> str:
|
||||||
Converts any string into a UUID string based on a seed.
|
Converts any string into a UUID string based on a seed.
|
||||||
|
|
||||||
Qdrant accepts UUID strings and unsigned integers as point ID.
|
Qdrant accepts UUID strings and unsigned integers as point ID.
|
||||||
We use a seed to convert each string into a UUID string deterministically.
|
We use a SHA-256 hash to convert each string into a UUID string deterministically.
|
||||||
This allows us to overwrite the same point with the original ID.
|
This allows us to overwrite the same point with the original ID.
|
||||||
"""
|
"""
|
||||||
return str(uuid.uuid5(uuid.NAMESPACE_DNS, _id))
|
hash_input = f"qdrant_id:{_id}".encode()
|
||||||
|
sha256_hash = hashlib.sha256(hash_input).hexdigest()
|
||||||
|
# Use the first 32 characters to create a valid UUID
|
||||||
|
return str(uuid.UUID(sha256_hash[:32]))
|
||||||
|
|
||||||
|
|
||||||
class QdrantIndex(EmbeddingIndex):
|
class QdrantIndex(EmbeddingIndex):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue