mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 04:50:39 +00:00
fix: Import chardet and pypdf only when actually needed
These dependencies are for inline::rag-runtime only. They are already defined for the provider as pip_dependencies. Resolves: #1432 Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
This commit is contained in:
parent
82e94fe22f
commit
fcb52fa3a4
1 changed files with 4 additions and 2 deletions
|
@ -12,11 +12,9 @@ from dataclasses import dataclass
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
|
|
||||||
import chardet
|
|
||||||
import httpx
|
import httpx
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from numpy.typing import NDArray
|
from numpy.typing import NDArray
|
||||||
from pypdf import PdfReader
|
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
from llama_stack.apis.common.content_types import (
|
||||||
URL,
|
URL,
|
||||||
|
@ -38,6 +36,8 @@ log = logging.getLogger(__name__)
|
||||||
def parse_pdf(data: bytes) -> str:
|
def parse_pdf(data: bytes) -> str:
|
||||||
# For PDF and DOC/DOCX files, we can't reliably convert to string
|
# For PDF and DOC/DOCX files, we can't reliably convert to string
|
||||||
pdf_bytes = io.BytesIO(data)
|
pdf_bytes = io.BytesIO(data)
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
pdf_reader = PdfReader(pdf_bytes)
|
pdf_reader = PdfReader(pdf_bytes)
|
||||||
return "\n".join([page.extract_text() for page in pdf_reader.pages])
|
return "\n".join([page.extract_text() for page in pdf_reader.pages])
|
||||||
|
|
||||||
|
@ -75,6 +75,8 @@ def content_from_data(data_url: str) -> str:
|
||||||
|
|
||||||
encoding = parts["encoding"]
|
encoding = parts["encoding"]
|
||||||
if not encoding:
|
if not encoding:
|
||||||
|
import chardet
|
||||||
|
|
||||||
detected = chardet.detect(data)
|
detected = chardet.detect(data)
|
||||||
encoding = detected["encoding"]
|
encoding = detected["encoding"]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue