fix: ChromaDB provider (#2413)

fixes the remote::chromaDB provider for vector_io by updating the method
definition appropriately.
Fixed impl to use score_threshold properly. 

### Test Plan 
```
# Start Chroma Docker 
docker run --rm \
  --name chromadb \
  -p 8800:8000 \
  -v ~/chroma:/chroma/chroma \
  -e IS_PERSISTENT=TRUE \
  -e ANONYMIZED_TELEMETRY=FALSE \
  chromadb/chroma:latest

# run pytest 
CHROMADB_URL="http://localhost:8800" pytest -sv tests/integration/vector_io/test_vector_io.py --stack-config vector_io=remote::chromadb,inference=fireworks --embedding-model nomic-ai/nomic-embed-text-v1.5
```
This commit is contained in:
Hardik Shah 2025-06-06 11:25:58 -07:00 committed by GitHub
parent 0d0b8d2be1
commit 1f48577a02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -55,7 +55,7 @@ class ChromaIndex(EmbeddingIndex):
)
)
async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
async def query_vector(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
results = await maybe_await(
self.collection.query(
query_embeddings=[embedding.tolist()],
@ -76,8 +76,12 @@ class ChromaIndex(EmbeddingIndex):
log.exception(f"Failed to parse document: {doc}")
continue
score = 1.0 / float(dist) if dist != 0 else float("inf")
if score < score_threshold:
continue
chunks.append(chunk)
scores.append(1.0 / float(dist))
scores.append(score)
return QueryChunksResponse(chunks=chunks, scores=scores)