From 8234cdf1a58322488c8a8b0ea5ccf00725fcad24 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 6 Mar 2025 20:09:14 -0500 Subject: [PATCH] fix(deps): move chardet and pypdf imports inline where used (#1434) # What does this PR do? Fix import errors due to `chardet` and `pypdf` not being installed while imported from `url_utils.py`. Closes #1432 ## Test Plan Now able to run the server with the config. [//]: # (## Documentation) Signed-off-by: Ihar Hrachyshka --- llama_stack/providers/utils/memory/vector_store.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 88ad9a989..ba4403ea1 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -12,11 +12,9 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional from urllib.parse import unquote -import chardet import httpx import numpy as np from numpy.typing import NDArray -from pypdf import PdfReader from llama_stack.apis.common.content_types import ( URL, @@ -38,6 +36,8 @@ log = logging.getLogger(__name__) def parse_pdf(data: bytes) -> str: # For PDF and DOC/DOCX files, we can't reliably convert to string pdf_bytes = io.BytesIO(data) + from pypdf import PdfReader + pdf_reader = PdfReader(pdf_bytes) return "\n".join([page.extract_text() for page in pdf_reader.pages]) @@ -75,6 +75,8 @@ def content_from_data(data_url: str) -> str: encoding = parts["encoding"] if not encoding: + import chardet + detected = chardet.detect(data) encoding = detected["encoding"]