Added a draft implementation of the preprocessor chain.

2026-01-05 09:42:24 +00:00 · 2025-03-05 17:17:17 +01:00 · 2025-03-05 17:17:17 +01:00 · b981181b25
commit b981181b25
parent 16764a2f06
7 changed files with 180 additions and 46 deletions
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -34,7 +34,13 @@ from llama_stack.apis.inference import (
    ToolPromptFormat,
 )
 from llama_stack.apis.models import ModelType
-from llama_stack.apis.preprocessing import Preprocessing, PreprocessingInput, PreprocessingResponse, PreprocessorOptions
+from llama_stack.apis.preprocessing import (
+    Preprocessing,
+    PreprocessorChain,
+    PreprocessorInput,
+    PreprocessorOptions,
+    PreprocessorResponse,
+)
 from llama_stack.apis.safety import RunShieldResponse, Safety
 from llama_stack.apis.scoring import (
    ScoreBatchResponse,
@ -52,6 +58,7 @@ from llama_stack.apis.tools import (
    ToolRuntime,
 )
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
+from llama_stack.distribution.utils.chain import execute_preprocessor_chain
 from llama_stack.providers.datatypes import RoutingTable
 from llama_stack.providers.utils.inference.prompt_adapter import get_default_tool_prompt_format

@ -501,11 +508,20 @@ class PreprocessingRouter(Preprocessing):
    async def preprocess(
        self,
        preprocessor_id: str,
-        preprocessor_inputs: List[PreprocessingInput],
+        preprocessor_inputs: List[PreprocessorInput],
        options: Optional[PreprocessorOptions] = None,
-    ) -> PreprocessingResponse:
+    ) -> PreprocessorResponse:
        return await self.routing_table.get_provider_impl(preprocessor_id).preprocess(
            preprocessor_id=preprocessor_id,
            preprocessor_inputs=preprocessor_inputs,
            options=options,
        )
+
+    async def chain_preprocess(
+        self,
+        preprocessors: PreprocessorChain,
+        preprocessor_inputs: List[PreprocessorInput],
+        is_rag_chain: Optional[bool] = False,
+    ) -> PreprocessorResponse:
+        preprocessor_impls = [self.routing_table.get_provider_impl(p.preprocessor_id) for p in preprocessors]
+        return await execute_preprocessor_chain(preprocessors, preprocessor_impls, preprocessor_inputs, is_rag_chain)
--- a/llama_stack/distribution/utils/chain.py
+++ b/llama_stack/distribution/utils/chain.py
@ -0,0 +1,71 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import logging
+from itertools import pairwise
+from typing import List
+
+from llama_stack.apis.preprocessing import (
+    Preprocessing,
+    PreprocessingDataType,
+    PreprocessorChain,
+    PreprocessorInput,
+    PreprocessorResponse,
+)
+
+log = logging.getLogger(__name__)
+
+
+def validate_chain(chain_impls: List[Preprocessing], is_rag_chain: bool) -> bool:
+    if len(chain_impls) == 0:
+        log.error("Empty preprocessing chain was provided")
+        return False
+
+    if is_rag_chain and PreprocessingDataType.chunks not in chain_impls[-1].output_types:
+        log.error(
+            f"RAG preprocessing chain must end with a chunk-producing preprocessor, but the last preprocessor in the provided chain only supports {chain_impls[-1].output_types}"
+        )
+        return False
+
+    for current_preprocessor, next_preprocessor in pairwise(chain_impls):
+        current_output_types = current_preprocessor.output_types
+        next_input_types = next_preprocessor.input_types
+
+        if len(list(set(current_output_types) & set(next_input_types))) == 0:
+            log.error(
+                f"Incompatible input ({current_output_types}) and output({next_input_types}) preprocessor data types"
+            )
+            return False
+
+    return True
+
+
+async def execute_preprocessor_chain(
+    preprocessor_chain: PreprocessorChain,
+    preprocessor_chain_impls: List[Preprocessing],
+    preprocessor_inputs: List[PreprocessorInput],
+    is_rag_chain: bool,
+) -> PreprocessorResponse:
+    if not validate_chain(preprocessor_chain_impls, is_rag_chain):
+        return PreprocessorResponse(status=False, results=[])
+
+    current_inputs = preprocessor_inputs
+    current_outputs = []
+
+    # TODO: replace with a parallel implementation
+    for i, current_params in enumerate(preprocessor_chain):
+        current_impl = preprocessor_chain_impls[i]
+        response = await current_impl.preprocess(
+            preprocessor_id=current_params.preprocessor_id,
+            preprocessor_inputs=current_inputs,
+            options=current_params.options,
+        )
+        if not response.status:
+            log.error(f"Preprocessor {current_params.preprocessor_id} returned an error")
+            return PreprocessorResponse(status=False, results=[])
+        current_outputs = response.results
+        current_inputs = current_outputs
+
+    return PreprocessorResponse(status=True, results=current_outputs)