Switched to using the existing Chunk type.

2025-08-07 19:12:09 +00:00 · 2025-03-03 20:31:06 +01:00 · 2025-03-03 20:31:06 +01:00 · 224d2d2891
commit 224d2d2891
parent c3515530bb
2 changed files with 5 additions and 10 deletions
--- a/llama_stack/apis/preprocessing/preprocessing.py
+++ b/llama_stack/apis/preprocessing/preprocessing.py
@ -8,8 +8,9 @@ from typing import Any, Dict, List, Optional, Protocol, runtime_checkable

 from pydantic import BaseModel

-from llama_stack.apis.common.content_types import URL
+from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.preprocessing.preprocessors import Preprocessor
+from llama_stack.apis.vector_io import Chunk
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -28,16 +29,10 @@ class PreprocessingInput(BaseModel):
 PreprocessorOptions = Dict[str, Any]


-@json_schema_type
-class PreprocessingResult(BaseModel):
-    metadata: dict[str, Any]
-    data: str
-
-
@json_schema_type
 class PreprocessingResponse(BaseModel):
    status: bool
-    results: Optional[List[str | PreprocessingResult]]
+    results: Optional[List[str | InterleavedContent | Chunk]]


 class PreprocessorStore(Protocol):
--- a/llama_stack/providers/inline/preprocessing/docling/docling.py
+++ b/llama_stack/providers/inline/preprocessing/docling/docling.py
@ -13,10 +13,10 @@ from llama_stack.apis.preprocessing import (
    Preprocessing,
    PreprocessingInput,
    PreprocessingResponse,
-    PreprocessingResult,
    Preprocessor,
    PreprocessorOptions,
 )
+from llama_stack.apis.vector_io import Chunk
 from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate
 from llama_stack.providers.inline.preprocessing.docling import InlineDoclingConfig

@ -56,7 +56,7 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
            converted_document = self.converter.convert(url).document
            if self.config.chunk:
                result = self.chunker.chunk(converted_document)
-                results.extend([PreprocessingResult(data=chunk.text, metadata=chunk.meta) for chunk in result])
+                results.extend([Chunk(content=chunk.text, metadata=chunk.meta) for chunk in result])
            else:
                result = converted_document.export_to_markdown()
                results.append(result)