mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
changes according the the comments
This commit is contained in:
parent
3c672f4293
commit
1abb78bd94
9 changed files with 203 additions and 57 deletions
|
|
@ -9855,9 +9855,21 @@ components:
|
||||||
title: Object
|
title: Object
|
||||||
default: vector_store.file
|
default: vector_store.file
|
||||||
attributes:
|
attributes:
|
||||||
additionalProperties: true
|
additionalProperties:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
maxLength: 512
|
||||||
|
- type: number
|
||||||
|
- type: boolean
|
||||||
|
title: string | number | boolean
|
||||||
|
propertyNames:
|
||||||
|
type: string
|
||||||
|
maxLength: 64
|
||||||
type: object
|
type: object
|
||||||
|
maxProperties: 16
|
||||||
title: Attributes
|
title: Attributes
|
||||||
|
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||||
|
x-oaiTypeLabel: map
|
||||||
chunking_strategy:
|
chunking_strategy:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
|
|
||||||
14
docs/static/deprecated-llama-stack-spec.yaml
vendored
14
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -6698,9 +6698,21 @@ components:
|
||||||
title: Object
|
title: Object
|
||||||
default: vector_store.file
|
default: vector_store.file
|
||||||
attributes:
|
attributes:
|
||||||
additionalProperties: true
|
additionalProperties:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
maxLength: 512
|
||||||
|
- type: number
|
||||||
|
- type: boolean
|
||||||
|
title: string | number | boolean
|
||||||
|
propertyNames:
|
||||||
|
type: string
|
||||||
|
maxLength: 64
|
||||||
type: object
|
type: object
|
||||||
|
maxProperties: 16
|
||||||
title: Attributes
|
title: Attributes
|
||||||
|
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||||
|
x-oaiTypeLabel: map
|
||||||
chunking_strategy:
|
chunking_strategy:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
|
|
||||||
14
docs/static/experimental-llama-stack-spec.yaml
vendored
14
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -6059,9 +6059,21 @@ components:
|
||||||
title: Object
|
title: Object
|
||||||
default: vector_store.file
|
default: vector_store.file
|
||||||
attributes:
|
attributes:
|
||||||
additionalProperties: true
|
additionalProperties:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
maxLength: 512
|
||||||
|
- type: number
|
||||||
|
- type: boolean
|
||||||
|
title: string | number | boolean
|
||||||
|
propertyNames:
|
||||||
|
type: string
|
||||||
|
maxLength: 64
|
||||||
type: object
|
type: object
|
||||||
|
maxProperties: 16
|
||||||
title: Attributes
|
title: Attributes
|
||||||
|
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||||
|
x-oaiTypeLabel: map
|
||||||
chunking_strategy:
|
chunking_strategy:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
|
|
||||||
14
docs/static/llama-stack-spec.yaml
vendored
14
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -8876,9 +8876,21 @@ components:
|
||||||
title: Object
|
title: Object
|
||||||
default: vector_store.file
|
default: vector_store.file
|
||||||
attributes:
|
attributes:
|
||||||
additionalProperties: true
|
additionalProperties:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
maxLength: 512
|
||||||
|
- type: number
|
||||||
|
- type: boolean
|
||||||
|
title: string | number | boolean
|
||||||
|
propertyNames:
|
||||||
|
type: string
|
||||||
|
maxLength: 64
|
||||||
type: object
|
type: object
|
||||||
|
maxProperties: 16
|
||||||
title: Attributes
|
title: Attributes
|
||||||
|
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||||
|
x-oaiTypeLabel: map
|
||||||
chunking_strategy:
|
chunking_strategy:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
|
|
||||||
14
docs/static/stainless-llama-stack-spec.yaml
vendored
14
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -9855,9 +9855,21 @@ components:
|
||||||
title: Object
|
title: Object
|
||||||
default: vector_store.file
|
default: vector_store.file
|
||||||
attributes:
|
attributes:
|
||||||
additionalProperties: true
|
additionalProperties:
|
||||||
|
anyOf:
|
||||||
|
- type: string
|
||||||
|
maxLength: 512
|
||||||
|
- type: number
|
||||||
|
- type: boolean
|
||||||
|
title: string | number | boolean
|
||||||
|
propertyNames:
|
||||||
|
type: string
|
||||||
|
maxLength: 64
|
||||||
type: object
|
type: object
|
||||||
|
maxProperties: 16
|
||||||
title: Attributes
|
title: Attributes
|
||||||
|
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||||
|
x-oaiTypeLabel: map
|
||||||
chunking_strategy:
|
chunking_strategy:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
|
|
||||||
|
|
@ -23,9 +23,6 @@ from llama_stack.providers.utils.memory.vector_store import (
|
||||||
content_from_data_and_mime_type,
|
content_from_data_and_mime_type,
|
||||||
make_overlapped_chunks,
|
make_overlapped_chunks,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.vector_io.vector_utils import (
|
|
||||||
sanitize_metadata_for_attributes,
|
|
||||||
)
|
|
||||||
from llama_stack_api import (
|
from llama_stack_api import (
|
||||||
Chunk,
|
Chunk,
|
||||||
Files,
|
Files,
|
||||||
|
|
@ -638,7 +635,7 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
file_id=chunk.metadata.get("document_id", ""),
|
file_id=chunk.metadata.get("document_id", ""),
|
||||||
filename=chunk.metadata.get("filename", ""),
|
filename=chunk.metadata.get("filename", ""),
|
||||||
score=score,
|
score=score,
|
||||||
attributes=sanitize_metadata_for_attributes(chunk.metadata),
|
attributes=chunk.metadata,
|
||||||
content=content,
|
content=content,
|
||||||
)
|
)
|
||||||
data.append(response_data_item)
|
data.append(response_data_item)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
|
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
|
||||||
|
|
@ -38,28 +37,6 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def sanitize_metadata_for_attributes(metadata: dict[str, Any]) -> dict[str, str | float | bool]:
|
|
||||||
"""
|
|
||||||
Filter metadata to primitives for VectorStoreSearchResponse.attributes compatibility.
|
|
||||||
|
|
||||||
Converts dict[str, Any] to dict[str, str | float | bool]:
|
|
||||||
- Preserves: str, bool
|
|
||||||
- Converts: int/float -> float, list -> comma-separated string
|
|
||||||
- Filters: dict, None, other types
|
|
||||||
"""
|
|
||||||
sanitized: dict[str, str | float | bool] = {}
|
|
||||||
for key, value in metadata.items():
|
|
||||||
if isinstance(value, bool):
|
|
||||||
sanitized[key] = value
|
|
||||||
elif isinstance(value, int | float):
|
|
||||||
sanitized[key] = float(value)
|
|
||||||
elif isinstance(value, str):
|
|
||||||
sanitized[key] = value
|
|
||||||
elif isinstance(value, list):
|
|
||||||
sanitized[key] = ", ".join(str(item) for item in value)
|
|
||||||
return sanitized
|
|
||||||
|
|
||||||
|
|
||||||
class WeightedInMemoryAggregator:
|
class WeightedInMemoryAggregator:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@
|
||||||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||||
|
|
||||||
from fastapi import Body, Query
|
from fastapi import Body, Query
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
|
||||||
from llama_stack_api.common.tracing import telemetry_traceable
|
from llama_stack_api.common.tracing import telemetry_traceable
|
||||||
from llama_stack_api.inference import InterleavedContent
|
from llama_stack_api.inference import InterleavedContent
|
||||||
|
|
@ -372,6 +372,65 @@ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal[
|
||||||
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
|
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
|
||||||
|
|
||||||
|
|
||||||
|
# VectorStoreFileAttributes type with OpenAPI constraints
|
||||||
|
VectorStoreFileAttributes = Annotated[
|
||||||
|
dict[str, Annotated[str, Field(max_length=512)] | float | bool],
|
||||||
|
Field(
|
||||||
|
max_length=16,
|
||||||
|
json_schema_extra={
|
||||||
|
"propertyNames": {"type": "string", "maxLength": 64},
|
||||||
|
"x-oaiTypeLabel": "map",
|
||||||
|
},
|
||||||
|
description=(
|
||||||
|
"Set of 16 key-value pairs that can be attached to an object. This can be "
|
||||||
|
"useful for storing additional information about the object in a structured "
|
||||||
|
"format, and querying for objects via API or the dashboard. Keys are strings "
|
||||||
|
"with a maximum length of 64 characters. Values are strings with a maximum "
|
||||||
|
"length of 512 characters, booleans, or numbers."
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]:
|
||||||
|
"""
|
||||||
|
Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only).
|
||||||
|
|
||||||
|
Converts dict[str, Any] to dict[str, str | float | bool]:
|
||||||
|
- Preserves: str (truncated to 512 chars), bool, int/float (as float)
|
||||||
|
- Converts: list -> comma-separated string
|
||||||
|
- Filters: dict, None, other types
|
||||||
|
- Enforces: max 16 properties, max 64 char keys, max 512 char string values
|
||||||
|
"""
|
||||||
|
if not metadata:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
sanitized: dict[str, str | float | bool] = {}
|
||||||
|
for key, value in metadata.items():
|
||||||
|
# Enforce max 16 properties
|
||||||
|
if len(sanitized) >= 16:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Enforce max 64 char keys
|
||||||
|
if len(key) > 64:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Convert to supported primitive types
|
||||||
|
if isinstance(value, bool):
|
||||||
|
sanitized[key] = value
|
||||||
|
elif isinstance(value, int | float):
|
||||||
|
sanitized[key] = float(value)
|
||||||
|
elif isinstance(value, str):
|
||||||
|
# Enforce max 512 char string values
|
||||||
|
sanitized[key] = value[:512] if len(value) > 512 else value
|
||||||
|
elif isinstance(value, list):
|
||||||
|
# Convert lists to comma-separated strings (max 512 chars)
|
||||||
|
list_str = ", ".join(str(item) for item in value)
|
||||||
|
sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str
|
||||||
|
|
||||||
|
return sanitized
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class VectorStoreFileObject(BaseModel):
|
class VectorStoreFileObject(BaseModel):
|
||||||
"""OpenAI Vector Store File object.
|
"""OpenAI Vector Store File object.
|
||||||
|
|
@ -389,7 +448,7 @@ class VectorStoreFileObject(BaseModel):
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
object: str = "vector_store.file"
|
object: str = "vector_store.file"
|
||||||
attributes: dict[str, Any] = Field(default_factory=dict)
|
attributes: VectorStoreFileAttributes = Field(default_factory=dict)
|
||||||
chunking_strategy: VectorStoreChunkingStrategy
|
chunking_strategy: VectorStoreChunkingStrategy
|
||||||
created_at: int
|
created_at: int
|
||||||
last_error: VectorStoreFileLastError | None = None
|
last_error: VectorStoreFileLastError | None = None
|
||||||
|
|
@ -397,6 +456,12 @@ class VectorStoreFileObject(BaseModel):
|
||||||
usage_bytes: int = 0
|
usage_bytes: int = 0
|
||||||
vector_store_id: str
|
vector_store_id: str
|
||||||
|
|
||||||
|
@field_validator("attributes", mode="before")
|
||||||
|
@classmethod
|
||||||
|
def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]:
|
||||||
|
"""Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec."""
|
||||||
|
return _sanitize_vector_store_attributes(v)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class VectorStoreListFilesResponse(BaseModel):
|
class VectorStoreListFilesResponse(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,8 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.utils.vector_io.vector_utils import (
|
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||||
generate_chunk_id,
|
from llama_stack_api import Chunk, ChunkMetadata, VectorStoreFileObject
|
||||||
sanitize_metadata_for_attributes,
|
|
||||||
)
|
|
||||||
from llama_stack_api import Chunk, ChunkMetadata
|
|
||||||
|
|
||||||
# This test is a unit test for the chunk_utils.py helpers. This should only contain
|
# This test is a unit test for the chunk_utils.py helpers. This should only contain
|
||||||
# tests which are specific to this file. More general (API-level) tests should be placed in
|
# tests which are specific to this file. More general (API-level) tests should be placed in
|
||||||
|
|
@ -83,25 +80,75 @@ def test_chunk_serialization():
|
||||||
assert "chunk_id" in serialized_chunk
|
assert "chunk_id" in serialized_chunk
|
||||||
|
|
||||||
|
|
||||||
def test_sanitize_metadata_for_attributes():
|
def test_vector_store_file_object_attributes_validation():
|
||||||
"""Test sanitization of metadata for VectorStoreSearchResponse.attributes."""
|
"""Test VectorStoreFileObject validates and sanitizes attributes at input boundary."""
|
||||||
# metadata with lists should be converted to strings
|
# Test with metadata containing lists, nested dicts, and primitives
|
||||||
metadata = {
|
from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
|
||||||
"tags": ["transformers", "h100-compatible", "region:us"],
|
|
||||||
"model_name": "granite-3.3-8b",
|
file_obj = VectorStoreFileObject(
|
||||||
"score": 0.95,
|
id="file-123",
|
||||||
"active": True,
|
attributes={
|
||||||
"count": 42,
|
"tags": ["transformers", "h100-compatible", "region:us"], # List -> string
|
||||||
"nested": {"key": "value"}, # Should be filtered out
|
"model_name": "granite-3.3-8b", # String preserved
|
||||||
}
|
"score": 0.95, # Float preserved
|
||||||
result = sanitize_metadata_for_attributes(metadata)
|
"active": True, # Bool preserved
|
||||||
|
"count": 42, # Int -> float
|
||||||
|
"nested": {"key": "value"}, # Dict filtered out
|
||||||
|
},
|
||||||
|
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||||
|
created_at=1234567890,
|
||||||
|
status="completed",
|
||||||
|
vector_store_id="vs-123",
|
||||||
|
)
|
||||||
|
|
||||||
# Lists converted to comma-separated strings
|
# Lists converted to comma-separated strings
|
||||||
assert result["tags"] == "transformers, h100-compatible, region:us"
|
assert file_obj.attributes["tags"] == "transformers, h100-compatible, region:us"
|
||||||
# Primitives preserved
|
# Primitives preserved
|
||||||
assert result["model_name"] == "granite-3.3-8b"
|
assert file_obj.attributes["model_name"] == "granite-3.3-8b"
|
||||||
assert result["score"] == 0.95
|
assert file_obj.attributes["score"] == 0.95
|
||||||
assert result["active"] is True
|
assert file_obj.attributes["active"] is True
|
||||||
assert result["count"] == 42.0 # int -> float
|
assert file_obj.attributes["count"] == 42.0 # int -> float
|
||||||
# Complex types filtered out
|
# Complex types filtered out
|
||||||
assert "nested" not in result
|
assert "nested" not in file_obj.attributes
|
||||||
|
|
||||||
|
|
||||||
|
def test_vector_store_file_object_attributes_constraints():
|
||||||
|
"""Test VectorStoreFileObject enforces OpenAPI constraints on attributes."""
|
||||||
|
from llama_stack_api.vector_io import VectorStoreChunkingStrategyAuto
|
||||||
|
|
||||||
|
# Test max 16 properties
|
||||||
|
many_attrs = {f"key{i}": f"value{i}" for i in range(20)}
|
||||||
|
file_obj = VectorStoreFileObject(
|
||||||
|
id="file-123",
|
||||||
|
attributes=many_attrs,
|
||||||
|
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||||
|
created_at=1234567890,
|
||||||
|
status="completed",
|
||||||
|
vector_store_id="vs-123",
|
||||||
|
)
|
||||||
|
assert len(file_obj.attributes) == 16 # Max 16 properties
|
||||||
|
|
||||||
|
# Test max 64 char keys are filtered
|
||||||
|
long_key_attrs = {"a" * 65: "value", "valid_key": "value"}
|
||||||
|
file_obj = VectorStoreFileObject(
|
||||||
|
id="file-124",
|
||||||
|
attributes=long_key_attrs,
|
||||||
|
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||||
|
created_at=1234567890,
|
||||||
|
status="completed",
|
||||||
|
vector_store_id="vs-123",
|
||||||
|
)
|
||||||
|
assert "a" * 65 not in file_obj.attributes
|
||||||
|
assert "valid_key" in file_obj.attributes
|
||||||
|
|
||||||
|
# Test max 512 char string values are truncated
|
||||||
|
long_value_attrs = {"key": "x" * 600}
|
||||||
|
file_obj = VectorStoreFileObject(
|
||||||
|
id="file-125",
|
||||||
|
attributes=long_value_attrs,
|
||||||
|
chunking_strategy=VectorStoreChunkingStrategyAuto(),
|
||||||
|
created_at=1234567890,
|
||||||
|
status="completed",
|
||||||
|
vector_store_id="vs-123",
|
||||||
|
)
|
||||||
|
assert len(file_obj.attributes["key"]) == 512
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue