This commit is contained in:
Xi Yan 2024-11-07 18:25:39 -08:00
parent 33b6d9b7b7
commit 6ee02ca23b
6 changed files with 100 additions and 87 deletions

View file

@ -5,41 +5,22 @@
# the root directory of this source tree.
import base64
import io
from urllib.parse import unquote
import pandas
import mimetypes
import os
from llama_models.llama3.api.datatypes import URL
from llama_stack.providers.utils.memory.vector_store import parse_data_url
def data_url_from_file(file_path: str) -> URL:
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
def get_dataframe_from_url(url: URL):
df = None
if url.uri.endswith(".csv"):
df = pandas.read_csv(url.uri)
elif url.uri.endswith(".xlsx"):
df = pandas.read_excel(url.uri)
elif url.uri.startswith("data:"):
parts = parse_data_url(url.uri)
data = parts["data"]
if parts["is_base64"]:
data = base64.b64decode(data)
else:
data = unquote(data)
encoding = parts["encoding"] or "utf-8"
data = data.encode(encoding)
with open(file_path, "rb") as file:
file_content = file.read()
mime_type = parts["mimetype"]
mime_category = mime_type.split("/")[0]
data_bytes = io.BytesIO(data)
base64_content = base64.b64encode(file_content).decode("utf-8")
mime_type, _ = mimetypes.guess_type(file_path)
if mime_category == "text":
df = pandas.read_csv(data_bytes)
else:
df = pandas.read_excel(data_bytes)
else:
raise ValueError(f"Unsupported file type: {url}")
data_url = f"data:{mime_type};base64,{base64_content}"
return df
return URL(uri=data_url)