mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-02 16:50:01 +00:00
Added draft implementation of built-in preprocessing for RAG.
This commit is contained in:
parent
5014de434e
commit
1a6e71c61f
9 changed files with 299 additions and 4 deletions
|
|
@ -3,6 +3,7 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
|
@ -21,6 +22,8 @@ from llama_stack.apis.vector_io import Chunk
|
|||
from llama_stack.providers.datatypes import PreprocessorsProtocolPrivate
|
||||
from llama_stack.providers.inline.preprocessing.docling import InlineDoclingConfig
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate):
|
||||
# this preprocessor receives URLs / paths to documents as input
|
||||
|
|
@ -58,7 +61,10 @@ class InclineDoclingPreprocessorImpl(Preprocessing, PreprocessorsProtocolPrivate
|
|||
elif isinstance(inp.path_or_content, URL):
|
||||
url = inp.path_or_content.uri
|
||||
else:
|
||||
raise ValueError(f"Unexpected type {type(inp.path_or_content)} for input {inp.path_or_content}")
|
||||
log.error(
|
||||
f"Unexpected type {type(inp.path_or_content)} for input {inp.path_or_content}, skipping this input."
|
||||
)
|
||||
continue
|
||||
|
||||
converted_document = self.converter.convert(url).document
|
||||
if self.config.chunk:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue