Switched to using the existing Chunk type.

This commit is contained in:
ilya-kolchinsky 2025-03-03 20:31:06 +01:00
parent c3515530bb
commit 224d2d2891
2 changed files with 5 additions and 10 deletions

View file

@ -8,8 +8,9 @@ from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.content_types import URL, InterleavedContent
from llama_stack.apis.preprocessing.preprocessors import Preprocessor from llama_stack.apis.preprocessing.preprocessors import Preprocessor
from llama_stack.apis.vector_io import Chunk
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -28,16 +29,10 @@ class PreprocessingInput(BaseModel):
PreprocessorOptions = Dict[str, Any] PreprocessorOptions = Dict[str, Any]
@json_schema_type
class PreprocessingResult(BaseModel):
metadata: dict[str, Any]
data: str
@json_schema_type @json_schema_type
class PreprocessingResponse(BaseModel): class PreprocessingResponse(BaseModel):
status: bool status: bool
results: Optional[List[str | PreprocessingResult]] results: Optional[List[str | InterleavedContent | Chunk]]
class PreprocessorStore(Protocol): class PreprocessorStore(Protocol):

View file

@ -13,10 +13,10 @@ from llama_stack.apis.preprocessing import (
Preprocessing, Preprocessing,
PreprocessingInput, PreprocessingInput,
PreprocessingResponse, PreprocessingResponse,
PreprocessingResult,
Preprocessor, Preprocessor,
PreprocessorOptions, PreprocessorOptions,
) )
from llama_stack.apis.vector_io import Chunk
from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate
from llama_stack.providers.inline.preprocessing.docling import InlineDoclingConfig from llama_stack.providers.inline.preprocessing.docling import InlineDoclingConfig
@ -56,7 +56,7 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
converted_document = self.converter.convert(url).document converted_document = self.converter.convert(url).document
if self.config.chunk: if self.config.chunk:
result = self.chunker.chunk(converted_document) result = self.chunker.chunk(converted_document)
results.extend([PreprocessingResult(data=chunk.text, metadata=chunk.meta) for chunk in result]) results.extend([Chunk(content=chunk.text, metadata=chunk.meta) for chunk in result])
else: else:
result = converted_document.export_to_markdown() result = converted_document.export_to_markdown()
results.append(result) results.append(result)