fix(deps): move chardet and pypdf imports inline where used (#1434)

# What does this PR do?

Fix import errors due to `chardet` and `pypdf` not being installed while
imported from `url_utils.py`.

Closes #1432

## Test Plan

Now able to run the server with the config.

[//]: # (## Documentation)

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
Ihar Hrachyshka 2025-03-06 20:09:14 -05:00 committed by GitHub
parent 803bf0e029
commit 8234cdf1a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -12,11 +12,9 @@ from dataclasses import dataclass
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from urllib.parse import unquote from urllib.parse import unquote
import chardet
import httpx import httpx
import numpy as np import numpy as np
from numpy.typing import NDArray from numpy.typing import NDArray
from pypdf import PdfReader
from llama_stack.apis.common.content_types import ( from llama_stack.apis.common.content_types import (
URL, URL,
@ -38,6 +36,8 @@ log = logging.getLogger(__name__)
def parse_pdf(data: bytes) -> str: def parse_pdf(data: bytes) -> str:
# For PDF and DOC/DOCX files, we can't reliably convert to string # For PDF and DOC/DOCX files, we can't reliably convert to string
pdf_bytes = io.BytesIO(data) pdf_bytes = io.BytesIO(data)
from pypdf import PdfReader
pdf_reader = PdfReader(pdf_bytes) pdf_reader = PdfReader(pdf_bytes)
return "\n".join([page.extract_text() for page in pdf_reader.pages]) return "\n".join([page.extract_text() for page in pdf_reader.pages])
@ -75,6 +75,8 @@ def content_from_data(data_url: str) -> str:
encoding = parts["encoding"] encoding = parts["encoding"]
if not encoding: if not encoding:
import chardet
detected = chardet.detect(data) detected = chardet.detect(data)
encoding = detected["encoding"] encoding = detected["encoding"]