Added input/output type declaration.

This commit is contained in:
ilya-kolchinsky 2025-03-04 12:34:30 +01:00
parent 224d2d2891
commit 5014de434e
2 changed files with 15 additions and 4 deletions

View file

@ -14,15 +14,19 @@ from llama_stack.apis.vector_io import Chunk
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
class PreprocessingInputType(Enum): class PreprocessingDataType(Enum):
document_content = "document_content" document_uri = "document_uri"
document_path = "document_path" document_directory_uri = "document_directory_uri"
binary_document = "binary_document"
raw_text_document = "raw_text_document"
chunks = "chunks"
@json_schema_type @json_schema_type
class PreprocessingInput(BaseModel): class PreprocessingInput(BaseModel):
preprocessor_input_id: str preprocessor_input_id: str
preprocessor_input_type: Optional[PreprocessingInputType] preprocessor_input_type: Optional[PreprocessingDataType]
path_or_content: str | URL path_or_content: str | URL

View file

@ -11,6 +11,7 @@ from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.content_types import URL
from llama_stack.apis.preprocessing import ( from llama_stack.apis.preprocessing import (
Preprocessing, Preprocessing,
PreprocessingDataType,
PreprocessingInput, PreprocessingInput,
PreprocessingResponse, PreprocessingResponse,
Preprocessor, Preprocessor,
@ -22,6 +23,12 @@ from llama_stack.providers.inline.preprocessing.docling import InlineDoclingConf
class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate): class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
# this preprocessor receives URLs / paths to documents as input
INPUT_TYPES = [PreprocessingDataType.document_uri]
# this preprocessor either only converts the documents into a text format, or also chunks them
OUTPUT_TYPES = [PreprocessingDataType.raw_text_document, PreprocessingDataType.chunks]
def __init__(self, config: InlineDoclingConfig) -> None: def __init__(self, config: InlineDoclingConfig) -> None:
self.config = config self.config = config
self.converter = DocumentConverter() self.converter = DocumentConverter()