fix: Import chardet and pypdf only when actually needed

These dependencies are for inline::rag-runtime only. They are already defined for the provider as pip_dependencies. Resolves: #1432 Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
2025-08-12 04:50:39 +00:00 · 2025-03-06 10:25:24 -05:00 · 2025-03-06 10:25:24 -05:00 · fcb52fa3a4
commit fcb52fa3a4
parent 82e94fe22f
1 changed files with 4 additions and 2 deletions
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@ -12,11 +12,9 @@ from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 from urllib.parse import unquote
 import chardet
 import httpx
 import numpy as np
 from numpy.typing import NDArray
 from pypdf import PdfReader
 from llama_stack.apis.common.content_types import (
    URL,
@ -38,6 +36,8 @@ log = logging.getLogger(__name__)
 def parse_pdf(data: bytes) -> str:
    # For PDF and DOC/DOCX files, we can't reliably convert to string
    pdf_bytes = io.BytesIO(data)
    from pypdf import PdfReader
    pdf_reader = PdfReader(pdf_bytes)
    return "\n".join([page.extract_text() for page in pdf_reader.pages])
@ -75,6 +75,8 @@ def content_from_data(data_url: str) -> str:
    encoding = parts["encoding"]
    if not encoding:
        import chardet
        detected = chardet.detect(data)
        encoding = detected["encoding"]