fix: Import chardet and pypdf only when actually needed

These dependencies are for inline::rag-runtime only. They are already
defined for the provider as pip_dependencies.

Resolves: #1432

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
Ihar Hrachyshka 2025-03-06 10:25:24 -05:00
parent 82e94fe22f
commit fcb52fa3a4

View file

@ -12,11 +12,9 @@ from dataclasses import dataclass
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from urllib.parse import unquote from urllib.parse import unquote
import chardet
import httpx import httpx
import numpy as np import numpy as np
from numpy.typing import NDArray from numpy.typing import NDArray
from pypdf import PdfReader
from llama_stack.apis.common.content_types import ( from llama_stack.apis.common.content_types import (
URL, URL,
@ -38,6 +36,8 @@ log = logging.getLogger(__name__)
def parse_pdf(data: bytes) -> str: def parse_pdf(data: bytes) -> str:
# For PDF and DOC/DOCX files, we can't reliably convert to string # For PDF and DOC/DOCX files, we can't reliably convert to string
pdf_bytes = io.BytesIO(data) pdf_bytes = io.BytesIO(data)
from pypdf import PdfReader
pdf_reader = PdfReader(pdf_bytes) pdf_reader = PdfReader(pdf_bytes)
return "\n".join([page.extract_text() for page in pdf_reader.pages]) return "\n".join([page.extract_text() for page in pdf_reader.pages])
@ -75,6 +75,8 @@ def content_from_data(data_url: str) -> str:
encoding = parts["encoding"] encoding = parts["encoding"]
if not encoding: if not encoding:
import chardet
detected = chardet.detect(data) detected = chardet.detect(data)
encoding = detected["encoding"] encoding = detected["encoding"]