fix(3797): sanitize metadata for attributes to avoid silent failure

This commit is contained in:
r-bit-rry 2025-11-17 11:03:20 +02:00
parent 97f535c4f1
commit f4a54b9db4
3 changed files with 55 additions and 2 deletions

View file

@ -23,6 +23,9 @@ from llama_stack.providers.utils.memory.vector_store import (
content_from_data_and_mime_type,
make_overlapped_chunks,
)
from llama_stack.providers.utils.vector_io.vector_utils import (
sanitize_metadata_for_attributes,
)
from llama_stack_api import (
Chunk,
Files,
@ -635,7 +638,7 @@ class OpenAIVectorStoreMixin(ABC):
file_id=chunk.metadata.get("document_id", ""),
filename=chunk.metadata.get("filename", ""),
score=score,
attributes=chunk.metadata,
attributes=sanitize_metadata_for_attributes(chunk.metadata),
content=content,
)
data.append(response_data_item)

View file

@ -7,6 +7,7 @@
import hashlib
import re
import uuid
from typing import Any
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
@ -37,6 +38,28 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
return s
def sanitize_metadata_for_attributes(metadata: dict[str, Any]) -> dict[str, str | float | bool]:
"""
Filter metadata to primitives for VectorStoreSearchResponse.attributes compatibility.
Converts dict[str, Any] to dict[str, str | float | bool]:
- Preserves: str, bool
- Converts: int/float -> float, list -> comma-separated string
- Filters: dict, None, other types
"""
sanitized: dict[str, str | float | bool] = {}
for key, value in metadata.items():
if isinstance(value, bool):
sanitized[key] = value
elif isinstance(value, (int, float)):
sanitized[key] = float(value)
elif isinstance(value, str):
sanitized[key] = value
elif isinstance(value, list):
sanitized[key] = ", ".join(str(item) for item in value)
return sanitized
class WeightedInMemoryAggregator:
@staticmethod
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]: