Added a draft implementation of the preprocessor chain.

This commit is contained in:
ilya-kolchinsky 2025-03-05 17:17:17 +01:00
parent 16764a2f06
commit b981181b25
7 changed files with 180 additions and 46 deletions

View file

@ -34,7 +34,13 @@ from llama_stack.apis.inference import (
ToolPromptFormat,
)
from llama_stack.apis.models import ModelType
from llama_stack.apis.preprocessing import Preprocessing, PreprocessingInput, PreprocessingResponse, PreprocessorOptions
from llama_stack.apis.preprocessing import (
Preprocessing,
PreprocessorChain,
PreprocessorInput,
PreprocessorOptions,
PreprocessorResponse,
)
from llama_stack.apis.safety import RunShieldResponse, Safety
from llama_stack.apis.scoring import (
ScoreBatchResponse,
@ -52,6 +58,7 @@ from llama_stack.apis.tools import (
ToolRuntime,
)
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.distribution.utils.chain import execute_preprocessor_chain
from llama_stack.providers.datatypes import RoutingTable
from llama_stack.providers.utils.inference.prompt_adapter import get_default_tool_prompt_format
@ -501,11 +508,20 @@ class PreprocessingRouter(Preprocessing):
async def preprocess(
self,
preprocessor_id: str,
preprocessor_inputs: List[PreprocessingInput],
preprocessor_inputs: List[PreprocessorInput],
options: Optional[PreprocessorOptions] = None,
) -> PreprocessingResponse:
) -> PreprocessorResponse:
return await self.routing_table.get_provider_impl(preprocessor_id).preprocess(
preprocessor_id=preprocessor_id,
preprocessor_inputs=preprocessor_inputs,
options=options,
)
async def chain_preprocess(
self,
preprocessors: PreprocessorChain,
preprocessor_inputs: List[PreprocessorInput],
is_rag_chain: Optional[bool] = False,
) -> PreprocessorResponse:
preprocessor_impls = [self.routing_table.get_provider_impl(p.preprocessor_id) for p in preprocessors]
return await execute_preprocessor_chain(preprocessors, preprocessor_impls, preprocessor_inputs, is_rag_chain)

View file

@ -0,0 +1,71 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import logging
from itertools import pairwise
from typing import List
from llama_stack.apis.preprocessing import (
Preprocessing,
PreprocessingDataType,
PreprocessorChain,
PreprocessorInput,
PreprocessorResponse,
)
log = logging.getLogger(__name__)
def validate_chain(chain_impls: List[Preprocessing], is_rag_chain: bool) -> bool:
if len(chain_impls) == 0:
log.error("Empty preprocessing chain was provided")
return False
if is_rag_chain and PreprocessingDataType.chunks not in chain_impls[-1].output_types:
log.error(
f"RAG preprocessing chain must end with a chunk-producing preprocessor, but the last preprocessor in the provided chain only supports {chain_impls[-1].output_types}"
)
return False
for current_preprocessor, next_preprocessor in pairwise(chain_impls):
current_output_types = current_preprocessor.output_types
next_input_types = next_preprocessor.input_types
if len(list(set(current_output_types) & set(next_input_types))) == 0:
log.error(
f"Incompatible input ({current_output_types}) and output({next_input_types}) preprocessor data types"
)
return False
return True
async def execute_preprocessor_chain(
preprocessor_chain: PreprocessorChain,
preprocessor_chain_impls: List[Preprocessing],
preprocessor_inputs: List[PreprocessorInput],
is_rag_chain: bool,
) -> PreprocessorResponse:
if not validate_chain(preprocessor_chain_impls, is_rag_chain):
return PreprocessorResponse(status=False, results=[])
current_inputs = preprocessor_inputs
current_outputs = []
# TODO: replace with a parallel implementation
for i, current_params in enumerate(preprocessor_chain):
current_impl = preprocessor_chain_impls[i]
response = await current_impl.preprocess(
preprocessor_id=current_params.preprocessor_id,
preprocessor_inputs=current_inputs,
options=current_params.options,
)
if not response.status:
log.error(f"Preprocessor {current_params.preprocessor_id} returned an error")
return PreprocessorResponse(status=False, results=[])
current_outputs = response.results
current_inputs = current_outputs
return PreprocessorResponse(status=True, results=current_outputs)