Added a draft implementation of the preprocessor chain.

2026-01-06 16:12:16 +00:00 · 2025-03-05 17:17:17 +01:00 · 2025-03-05 17:17:17 +01:00 · b981181b25
commit b981181b25
parent 16764a2f06
7 changed files with 180 additions and 46 deletions
--- a/llama_stack/providers/inline/preprocessing/basic/basic.py
+++ b/llama_stack/providers/inline/preprocessing/basic/basic.py
@ -14,10 +14,11 @@ from llama_stack.apis.preprocessing import (
    Preprocessing,
    PreprocessingDataFormat,
    PreprocessingDataType,
-    PreprocessingInput,
-    PreprocessingResponse,
    Preprocessor,
+    PreprocessorChain,
+    PreprocessorInput,
    PreprocessorOptions,
+    PreprocessorResponse,
 )
 from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate
 from llama_stack.providers.inline.preprocessing.basic.config import InlineBasicPreprocessorConfig
@ -29,14 +30,14 @@ log = logging.getLogger(__name__)

 class InclineBasicPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
    # this preprocessor can either receive documents (text or binary) or document URIs
-    INPUT_TYPES = [
+    input_types = [
        PreprocessingDataType.binary_document,
        PreprocessingDataType.raw_text_document,
        PreprocessingDataType.document_uri,
    ]

    # this preprocessor optionally retrieves the documents and converts them into plain text
-    OUTPUT_TYPES = [PreprocessingDataType.raw_text_document]
+    output_types = [PreprocessingDataType.raw_text_document]

    URL_VALIDATION_PATTERN = re.compile("^(https?://|file://|data:)")

@ -54,9 +55,9 @@ class InclineBasicPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
    async def preprocess(
        self,
        preprocessor_id: str,
-        preprocessor_inputs: List[PreprocessingInput],
+        preprocessor_inputs: List[PreprocessorInput],
        options: Optional[PreprocessorOptions] = None,
-    ) -> PreprocessingResponse:
+    ) -> PreprocessorResponse:
        results = []

        for inp in preprocessor_inputs:
@ -87,10 +88,18 @@ class InclineBasicPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):

            results.append(document)

-        return PreprocessingResponse(status=True, results=results)
+        return PreprocessorResponse(status=True, results=results)
+
+    async def chain_preprocess(
+        self,
+        preprocessors: PreprocessorChain,
+        preprocessor_inputs: List[PreprocessorInput],
+        is_rag_chain: Optional[bool] = False,
+    ) -> PreprocessorResponse:
+        return await self.preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)

    @staticmethod
-    async def _resolve_input_type(preprocessor_input: PreprocessingInput) -> PreprocessingDataType:
+    async def _resolve_input_type(preprocessor_input: PreprocessorInput) -> PreprocessingDataType:
        if preprocessor_input.preprocessor_input_type is not None:
            return preprocessor_input.preprocessor_input_type

@ -104,7 +113,7 @@ class InclineBasicPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
        return PreprocessingDataType.raw_text_document

    @staticmethod
-    async def _fetch_document(preprocessor_input: PreprocessingInput) -> str | None:
+    async def _fetch_document(preprocessor_input: PreprocessorInput) -> str | None:
        if isinstance(preprocessor_input.path_or_content, str):
            url = preprocessor_input.path_or_content
            if not InclineBasicPreprocessorImpl.URL_VALIDATION_PATTERN.match(url):
@ -125,7 +134,3 @@ class InclineBasicPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
            r = await client.get(url)

        return r.content if preprocessor_input.preprocessor_input_format == PreprocessingDataFormat.pdf else r.text
-
-    @staticmethod
-    def is_pdf(preprocessor_input: PreprocessingInput):
-        return
--- a/llama_stack/providers/inline/preprocessing/docling/docling.py
+++ b/llama_stack/providers/inline/preprocessing/docling/docling.py
@ -13,10 +13,11 @@ from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.preprocessing import (
    Preprocessing,
    PreprocessingDataType,
-    PreprocessingInput,
-    PreprocessingResponse,
    Preprocessor,
+    PreprocessorChain,
+    PreprocessorInput,
    PreprocessorOptions,
+    PreprocessorResponse,
 )
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate
@ -27,10 +28,10 @@ log = logging.getLogger(__name__)

 class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
    # this preprocessor receives URLs / paths to documents as input
-    INPUT_TYPES = [PreprocessingDataType.document_uri]
+    input_types = [PreprocessingDataType.document_uri]

    # this preprocessor either only converts the documents into a text format, or also chunks them
-    OUTPUT_TYPES = [PreprocessingDataType.raw_text_document, PreprocessingDataType.chunks]
+    output_types = [PreprocessingDataType.raw_text_document, PreprocessingDataType.chunks]

    def __init__(self, config: InlineDoclingConfig) -> None:
        self.config = config
@ -50,9 +51,9 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
    async def preprocess(
        self,
        preprocessor_id: str,
-        preprocessor_inputs: List[PreprocessingInput],
+        preprocessor_inputs: List[PreprocessorInput],
        options: Optional[PreprocessorOptions] = None,
-    ) -> PreprocessingResponse:
+    ) -> PreprocessorResponse:
        results = []

        for inp in preprocessor_inputs:
@ -74,4 +75,12 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
                result = converted_document.export_to_markdown()
                results.append(result)

-        return PreprocessingResponse(status=True, results=results)
+        return PreprocessorResponse(status=True, results=results)
+
+    async def chain_preprocess(
+        self,
+        preprocessors: PreprocessorChain,
+        preprocessor_inputs: List[PreprocessorInput],
+        is_rag_chain: Optional[bool] = False,
+    ) -> PreprocessorResponse:
+        return await self.preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)
--- a/llama_stack/providers/inline/preprocessing/simple_chunking/simple_chunking.py
+++ b/llama_stack/providers/inline/preprocessing/simple_chunking/simple_chunking.py
@ -12,10 +12,11 @@ from llama_models.llama3.api import Tokenizer
 from llama_stack.apis.preprocessing import (
    Preprocessing,
    PreprocessingDataType,
-    PreprocessingInput,
-    PreprocessingResponse,
    Preprocessor,
+    PreprocessorChain,
+    PreprocessorInput,
    PreprocessorOptions,
+    PreprocessorResponse,
 )
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate
@ -31,8 +32,8 @@ class SimpleChunkingOptions(Enum):

 class InclineSimpleChunkingImpl(Preprocessing, PreprocessorsProtocolPrivate):
    # this preprocessor receives plain text and returns chunks
-    INPUT_TYPES = [PreprocessingDataType.raw_text_document]
-    OUTPUT_TYPES = [PreprocessingDataType.chunks]
+    input_types = [PreprocessingDataType.raw_text_document]
+    output_types = [PreprocessingDataType.chunks]

    def __init__(self, config: InclineSimpleChunkingConfig) -> None:
        self.config = config
@ -48,9 +49,9 @@ class InclineSimpleChunkingImpl(Preprocessing, PreprocessorsProtocolPrivate):
    async def preprocess(
        self,
        preprocessor_id: str,
-        preprocessor_inputs: List[PreprocessingInput],
+        preprocessor_inputs: List[PreprocessorInput],
        options: Optional[PreprocessorOptions] = None,
-    ) -> PreprocessingResponse:
+    ) -> PreprocessorResponse:
        chunks = []

        window_len, overlap_len = self._resolve_chunk_size_params(options)
@ -61,7 +62,15 @@ class InclineSimpleChunkingImpl(Preprocessing, PreprocessorsProtocolPrivate):
            )
            chunks.extend(new_chunks)

-        return PreprocessingResponse(status=True, results=chunks)
+        return PreprocessorResponse(status=True, results=chunks)
+
+    async def chain_preprocess(
+        self,
+        preprocessors: PreprocessorChain,
+        preprocessor_inputs: List[PreprocessorInput],
+        is_rag_chain: Optional[bool] = False,
+    ) -> PreprocessorResponse:
+        return await self.preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)

    def _resolve_chunk_size_params(self, options: PreprocessorOptions) -> Tuple[int, int]:
        window_len = (options or {}).get(
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@ -18,7 +18,12 @@ from llama_stack.apis.common.content_types import (
    TextContentItem,
 )
 from llama_stack.apis.inference import Inference
-from llama_stack.apis.preprocessing import Preprocessing, PreprocessingDataFormat, PreprocessingInput
+from llama_stack.apis.preprocessing import (
+    Preprocessing,
+    PreprocessingDataFormat,
+    PreprocessorChainElement,
+    PreprocessorInput,
+)
 from llama_stack.apis.tools import (
    RAGDocument,
    RAGQueryConfig,
@ -67,17 +72,16 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
        vector_db_id: str,
        chunk_size_in_tokens: int = 512,
    ) -> None:
-        preprocessing_inputs = [self._rag_document_to_preprocessing_input(d) for d in documents]
-
-        conversion_response = await self.preprocessing_api.preprocess(
-            preprocessor_id="builtin::basic", preprocessor_inputs=preprocessing_inputs
+        preprocessor_inputs = [self._rag_document_to_preprocessor_input(d) for d in documents]
+        preprocessor_chain = [
+            PreprocessorChainElement(preprocessor_id="builtin::basic"),
+            PreprocessorChainElement(preprocessor_id="builtin::chunking"),
+        ]
+        preprocessor_response = await self.preprocessing_api.chain_preprocess(
+            preprocessors=preprocessor_chain, preprocessor_inputs=preprocessor_inputs
        )
-        converted_inputs = conversion_response.results

-        chunking_response = await self.preprocessing_api.preprocess(
-            preprocessor_id="builtin::chunking", preprocessor_inputs=converted_inputs
-        )
-        chunks = chunking_response.results
+        chunks = preprocessor_response.results

        if not chunks:
            return
@ -197,13 +201,13 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
        )

    @staticmethod
-    def _rag_document_to_preprocessing_input(document: RAGDocument) -> PreprocessingInput:
+    def _rag_document_to_preprocessor_input(document: RAGDocument) -> PreprocessorInput:
        if document.mime_type == "application/pdf":
            preprocessor_input_format = PreprocessingDataFormat.pdf
        else:
            preprocessor_input_format = None

-        return PreprocessingInput(
+        return PreprocessorInput(
            preprocessor_input_id=document.document_id,
            preprocessor_input_format=preprocessor_input_format,
            path_or_content=document.content,