removed error handling for Chunk and add error handling for maybe_await

This commit is contained in:
kimbwook 2025-10-15 11:47:56 +09:00
parent 53f09a7a65
commit 1b82c3c97e
No known key found for this signature in database
GPG key ID: 13B032C99CBD373A

View file

@ -116,14 +116,29 @@ class ChromaIndex(EmbeddingIndex):
k: int, k: int,
score_threshold: float, score_threshold: float,
) -> QueryChunksResponse: ) -> QueryChunksResponse:
results = await maybe_await( """
self.collection.query( Perform keyword search using Chroma's built-in where_document feature.
query_texts=[query_string],
where_document={"$contains": query_string}, Args:
n_results=k, query_string: The text query for keyword search
include=["documents", "distances"], k: Number of results to return
score_threshold: Minimum similarity score threshold
Returns:
QueryChunksResponse with combined results
"""
try:
results = await maybe_await(
self.collection.query(
query_texts=[query_string],
where_document={"$contains": query_string},
n_results=k,
include=["documents", "distances"],
)
) )
) except Exception as e:
log.error(f"Chroma client keyword search failed: {e}")
raise
distances = results["distances"][0] if results["distances"] else [] distances = results["distances"][0] if results["distances"] else []
documents = results["documents"][0] if results["documents"] else [] documents = results["documents"][0] if results["documents"] else []
@ -132,12 +147,8 @@ class ChromaIndex(EmbeddingIndex):
scores = [] scores = []
for dist, doc in zip(distances, documents, strict=False): for dist, doc in zip(distances, documents, strict=False):
try: doc_data = json.loads(doc)
doc_data = json.loads(doc) chunk = Chunk(**doc_data)
chunk = Chunk(**doc_data)
except Exception:
log.exception(f"Failed to load chunk: {doc}")
continue
score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0 score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0