Simplified the preprocessing interface.

2025-12-31 07:20:00 +00:00 · 2025-03-11 13:05:48 +01:00 · 2025-03-11 13:05:48 +01:00 · ad4cf97604
commit ad4cf97604
parent 1eeba2cc8a
8 changed files with 31 additions and 43 deletions
--- a/llama_stack/providers/inline/preprocessing/basic/basic.py
+++ b/llama_stack/providers/inline/preprocessing/basic/basic.py
@ -52,7 +52,7 @@ class InclineBasicPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):

    async def unregister_preprocessor(self, preprocessor_id: str) -> None: ...

-    async def preprocess(
+    async def do_preprocess(
        self,
        preprocessor_id: str,
        preprocessor_inputs: List[PreprocessingDataElement],
@ -98,12 +98,12 @@ class InclineBasicPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
            success=True, output_data_type=PreprocessingDataType.raw_text_document, results=results
        )

-    async def chain_preprocess(
+    async def preprocess(
        self,
        preprocessors: PreprocessorChain,
        preprocessor_inputs: List[PreprocessingDataElement],
    ) -> PreprocessorResponse:
-        return await self.preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)
+        return await self.do_preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)

    @staticmethod
    def _resolve_input_type(preprocessor_input: PreprocessingDataElement) -> PreprocessingDataType:
--- a/llama_stack/providers/inline/preprocessing/docling/docling.py
+++ b/llama_stack/providers/inline/preprocessing/docling/docling.py
@ -47,7 +47,7 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate

    async def unregister_preprocessor(self, preprocessor_id: str) -> None: ...

-    async def preprocess(
+    async def do_preprocess(
        self,
        preprocessor_id: str,
        preprocessor_inputs: List[PreprocessingDataElement],
@ -106,9 +106,9 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
        )
        return PreprocessorResponse(success=True, output_data_type=output_data_type, results=results)

-    async def chain_preprocess(
+    async def preprocess(
        self,
        preprocessors: PreprocessorChain,
        preprocessor_inputs: List[PreprocessingDataElement],
    ) -> PreprocessorResponse:
-        return await self.preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)
+        return await self.do_preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)
--- a/llama_stack/providers/inline/preprocessing/simple_chunking/simple_chunking.py
+++ b/llama_stack/providers/inline/preprocessing/simple_chunking/simple_chunking.py
@ -47,7 +47,7 @@ class InclineSimpleChunkingImpl(Preprocessing, PreprocessorsProtocolPrivate):

    async def unregister_preprocessor(self, preprocessor_id: str) -> None: ...

-    async def preprocess(
+    async def do_preprocess(
        self,
        preprocessor_id: str,
        preprocessor_inputs: List[PreprocessingDataElement],
@ -72,12 +72,12 @@ class InclineSimpleChunkingImpl(Preprocessing, PreprocessorsProtocolPrivate):

        return PreprocessorResponse(success=True, output_data_type=PreprocessingDataType.chunks, results=chunks)

-    async def chain_preprocess(
+    async def preprocess(
        self,
        preprocessors: PreprocessorChain,
        preprocessor_inputs: List[PreprocessingDataElement],
    ) -> PreprocessorResponse:
-        return await self.preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)
+        return await self.do_preprocess(preprocessor_id="", preprocessor_inputs=preprocessor_inputs)

    def _resolve_chunk_size_params(self, options: PreprocessorOptions) -> Tuple[int, int]:
        window_len = (options or {}).get(
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@ -81,7 +81,7 @@ class MemoryToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime, RAGToolRuntime):
        preprocessor_chain: Optional[PreprocessorChain] = None,
    ) -> None:
        preprocessor_inputs = [self._rag_document_to_preprocessor_input(d) for d in documents]
-        preprocessor_response = await self.preprocessing_api.chain_preprocess(
+        preprocessor_response = await self.preprocessing_api.preprocess(
            preprocessors=preprocessor_chain or self.DEFAULT_PREPROCESSING_CHAIN,
            preprocessor_inputs=preprocessor_inputs,
        )