fix

2025-12-17 14:12:37 +00:00 · 2024-11-07 18:25:39 -08:00 · 2024-11-07 18:25:39 -08:00 · 6ee02ca23b
commit 6ee02ca23b
parent 33b6d9b7b7
6 changed files with 100 additions and 87 deletions
--- a/llama_stack/providers/utils/memory/file_utils.py
+++ b/llama_stack/providers/utils/memory/file_utils.py
@ -5,41 +5,22 @@
 # the root directory of this source tree.

 import base64
-import io
-from urllib.parse import unquote
-
-import pandas
+import mimetypes
+import os

 from llama_models.llama3.api.datatypes import URL

-from llama_stack.providers.utils.memory.vector_store import parse_data_url

+def data_url_from_file(file_path: str) -> URL:
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found: {file_path}")

-def get_dataframe_from_url(url: URL):
-    df = None
-    if url.uri.endswith(".csv"):
-        df = pandas.read_csv(url.uri)
-    elif url.uri.endswith(".xlsx"):
-        df = pandas.read_excel(url.uri)
-    elif url.uri.startswith("data:"):
-        parts = parse_data_url(url.uri)
-        data = parts["data"]
-        if parts["is_base64"]:
-            data = base64.b64decode(data)
-        else:
-            data = unquote(data)
-            encoding = parts["encoding"] or "utf-8"
-            data = data.encode(encoding)
+    with open(file_path, "rb") as file:
+        file_content = file.read()

-        mime_type = parts["mimetype"]
-        mime_category = mime_type.split("/")[0]
-        data_bytes = io.BytesIO(data)
+    base64_content = base64.b64encode(file_content).decode("utf-8")
+    mime_type, _ = mimetypes.guess_type(file_path)

-        if mime_category == "text":
-            df = pandas.read_csv(data_bytes)
-        else:
-            df = pandas.read_excel(data_bytes)
-    else:
-        raise ValueError(f"Unsupported file type: {url}")
+    data_url = f"data:{mime_type};base64,{base64_content}"

-    return df
+    return URL(uri=data_url)