diff --git a/llama_stack/providers/inline/preprocessing/docling/docling.py b/llama_stack/providers/inline/preprocessing/docling/docling.py
index 9db89806b..281c72b54 100644
--- a/llama_stack/providers/inline/preprocessing/docling/docling.py
+++ b/llama_stack/providers/inline/preprocessing/docling/docling.py
@@ -36,13 +36,10 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
 
     def __init__(self, config: InlineDoclingConfig) -> None:
         self.config = config
-        self.converter = DocumentConverter()
+        self.converter = None
         self.chunker = None
 
-    async def initialize(self) -> None:
-        if self.config.chunk:
-            # TODO: docling should use Llama Stack's inference API instead of handling tokenization by itself
-            self.chunker = HybridChunker()
+    async def initialize(self) -> None: ...
 
     async def shutdown(self) -> None: ...
 
@@ -56,6 +53,13 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
         preprocessor_inputs: List[PreprocessingDataElement],
         options: Optional[PreprocessorOptions] = None,
     ) -> PreprocessorResponse:
+        if self.converter is None:
+            # this is the first time this method is called
+            self.converter = DocumentConverter()
+            if self.config.chunk and self.chunker is None:
+                # TODO: docling should use Llama Stack's inference API instead of handling tokenization by itself
+                self.chunker = HybridChunker()
+
         results = []
 
         for inp in preprocessor_inputs: