chore: Enabling Integration tests for Weaviate (#2882)

# What does this PR do?

This PR (1) enables the files API for Weaviate and (2) enables
integration tests for Weaviate, which adds a docker container to the
github action.

This PR also handles a couple of edge cases for in creating the
collection and ensuring the tests all pass.

## Test Plan
CI enabled

---------

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Arceo 2025-07-31 20:29:50 -04:00 committed by GitHub
parent 369286f95b
commit 33cca26154
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 2197 additions and 2033 deletions

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
import hashlib
import re
import uuid
@ -19,3 +20,20 @@ def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | Non
if chunk_window:
hash_input += f":{chunk_window}".encode()
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
def proper_case(s: str) -> str:
"""Convert a string to proper case (first letter uppercase, rest lowercase)."""
return s[0].upper() + s[1:].lower() if s else s
def sanitize_collection_name(name: str, weaviate_format=False) -> str:
"""
Sanitize collection name to ensure it only contains numbers, letters, and underscores.
Any other characters are replaced with underscores.
"""
if not weaviate_format:
s = re.sub(r"[^a-zA-Z0-9_]", "_", name)
else:
s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name))
return s