mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
fix(3797): sanitize metadata for attributes to avoid silent failure
This commit is contained in:
parent
97f535c4f1
commit
f4a54b9db4
3 changed files with 55 additions and 2 deletions
|
|
@ -23,6 +23,9 @@ from llama_stack.providers.utils.memory.vector_store import (
|
|||
content_from_data_and_mime_type,
|
||||
make_overlapped_chunks,
|
||||
)
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import (
|
||||
sanitize_metadata_for_attributes,
|
||||
)
|
||||
from llama_stack_api import (
|
||||
Chunk,
|
||||
Files,
|
||||
|
|
@ -635,7 +638,7 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
file_id=chunk.metadata.get("document_id", ""),
|
||||
filename=chunk.metadata.get("filename", ""),
|
||||
score=score,
|
||||
attributes=chunk.metadata,
|
||||
attributes=sanitize_metadata_for_attributes(chunk.metadata),
|
||||
content=content,
|
||||
)
|
||||
data.append(response_data_item)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
import hashlib
|
||||
import re
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
|
||||
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
|
||||
|
|
@ -37,6 +38,28 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
|
|||
return s
|
||||
|
||||
|
||||
def sanitize_metadata_for_attributes(metadata: dict[str, Any]) -> dict[str, str | float | bool]:
|
||||
"""
|
||||
Filter metadata to primitives for VectorStoreSearchResponse.attributes compatibility.
|
||||
|
||||
Converts dict[str, Any] to dict[str, str | float | bool]:
|
||||
- Preserves: str, bool
|
||||
- Converts: int/float -> float, list -> comma-separated string
|
||||
- Filters: dict, None, other types
|
||||
"""
|
||||
sanitized: dict[str, str | float | bool] = {}
|
||||
for key, value in metadata.items():
|
||||
if isinstance(value, bool):
|
||||
sanitized[key] = value
|
||||
elif isinstance(value, (int, float)):
|
||||
sanitized[key] = float(value)
|
||||
elif isinstance(value, str):
|
||||
sanitized[key] = value
|
||||
elif isinstance(value, list):
|
||||
sanitized[key] = ", ".join(str(item) for item in value)
|
||||
return sanitized
|
||||
|
||||
|
||||
class WeightedInMemoryAggregator:
|
||||
@staticmethod
|
||||
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import (
|
||||
generate_chunk_id,
|
||||
sanitize_metadata_for_attributes,
|
||||
)
|
||||
from llama_stack_api import Chunk, ChunkMetadata
|
||||
|
||||
# This test is a unit test for the chunk_utils.py helpers. This should only contain
|
||||
|
|
@ -78,3 +81,27 @@ def test_chunk_serialization():
|
|||
serialized_chunk = chunk.model_dump()
|
||||
assert serialized_chunk["chunk_id"] == "test-chunk-id"
|
||||
assert "chunk_id" in serialized_chunk
|
||||
|
||||
|
||||
def test_sanitize_metadata_for_attributes():
|
||||
"""Test sanitization of metadata for VectorStoreSearchResponse.attributes."""
|
||||
# metadata with lists should be converted to strings
|
||||
metadata = {
|
||||
"tags": ["transformers", "h100-compatible", "region:us"],
|
||||
"model_name": "granite-3.3-8b",
|
||||
"score": 0.95,
|
||||
"active": True,
|
||||
"count": 42,
|
||||
"nested": {"key": "value"}, # Should be filtered out
|
||||
}
|
||||
result = sanitize_metadata_for_attributes(metadata)
|
||||
|
||||
# Lists converted to comma-separated strings
|
||||
assert result["tags"] == "transformers, h100-compatible, region:us"
|
||||
# Primitives preserved
|
||||
assert result["model_name"] == "granite-3.3-8b"
|
||||
assert result["score"] == 0.95
|
||||
assert result["active"] is True
|
||||
assert result["count"] == 42.0 # int -> float
|
||||
# Complex types filtered out
|
||||
assert "nested" not in result
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue