From fcb52fa3a454b4fcb063c692a22e8841296d7a3b Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 6 Mar 2025 10:25:24 -0500 Subject: [PATCH] fix: Import chardet and pypdf only when actually needed These dependencies are for inline::rag-runtime only. They are already defined for the provider as pip_dependencies. Resolves: #1432 Signed-off-by: Ihar Hrachyshka --- llama_stack/providers/utils/memory/vector_store.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 88ad9a989..ba4403ea1 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -12,11 +12,9 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional from urllib.parse import unquote -import chardet import httpx import numpy as np from numpy.typing import NDArray -from pypdf import PdfReader from llama_stack.apis.common.content_types import ( URL, @@ -38,6 +36,8 @@ log = logging.getLogger(__name__) def parse_pdf(data: bytes) -> str: # For PDF and DOC/DOCX files, we can't reliably convert to string pdf_bytes = io.BytesIO(data) + from pypdf import PdfReader + pdf_reader = PdfReader(pdf_bytes) return "\n".join([page.extract_text() for page in pdf_reader.pages]) @@ -75,6 +75,8 @@ def content_from_data(data_url: str) -> str: encoding = parts["encoding"] if not encoding: + import chardet + detected = chardet.detect(data) encoding = detected["encoding"]