diff --git a/llama_stack/apis/preprocessing/preprocessing.py b/llama_stack/apis/preprocessing/preprocessing.py index 440486ef3..780d5ec40 100644 --- a/llama_stack/apis/preprocessing/preprocessing.py +++ b/llama_stack/apis/preprocessing/preprocessing.py @@ -8,8 +8,9 @@ from typing import Any, Dict, List, Optional, Protocol, runtime_checkable from pydantic import BaseModel -from llama_stack.apis.common.content_types import URL +from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.preprocessing.preprocessors import Preprocessor +from llama_stack.apis.vector_io import Chunk from llama_stack.schema_utils import json_schema_type, webmethod @@ -28,16 +29,10 @@ class PreprocessingInput(BaseModel): PreprocessorOptions = Dict[str, Any] -@json_schema_type -class PreprocessingResult(BaseModel): - metadata: dict[str, Any] - data: str - - @json_schema_type class PreprocessingResponse(BaseModel): status: bool - results: Optional[List[str | PreprocessingResult]] + results: Optional[List[str | InterleavedContent | Chunk]] class PreprocessorStore(Protocol): diff --git a/llama_stack/providers/inline/preprocessing/docling/docling.py b/llama_stack/providers/inline/preprocessing/docling/docling.py index 3de4d9ed9..9305f7d8e 100644 --- a/llama_stack/providers/inline/preprocessing/docling/docling.py +++ b/llama_stack/providers/inline/preprocessing/docling/docling.py @@ -13,10 +13,10 @@ from llama_stack.apis.preprocessing import ( Preprocessing, PreprocessingInput, PreprocessingResponse, - PreprocessingResult, Preprocessor, PreprocessorOptions, ) +from llama_stack.apis.vector_io import Chunk from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate from llama_stack.providers.inline.preprocessing.docling import InlineDoclingConfig @@ -56,7 +56,7 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate converted_document = self.converter.convert(url).document if self.config.chunk: result = self.chunker.chunk(converted_document) - results.extend([PreprocessingResult(data=chunk.text, metadata=chunk.meta) for chunk in result]) + results.extend([Chunk(content=chunk.text, metadata=chunk.meta) for chunk in result]) else: result = converted_document.export_to_markdown() results.append(result)