forked from phoenix-oss/llama-stack-mirror
feat(dataset api): (1.5/n) fix dataset registeration (#1659)
# What does this PR do? - fix dataset registeration & iterrows > NOTE: the URL endpoint is changed to datasetio due to flaky path routing [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/datasets/test_datasets.py ``` <img width="854" alt="image" src="https://github.com/user-attachments/assets/0168b352-1c5a-48d1-8e9a-93141d418e54" /> [//]: # (## Documentation)
This commit is contained in:
parent
2c9d624910
commit
a568bf3f9d
13 changed files with 159 additions and 248 deletions
|
@ -10,18 +10,17 @@ from urllib.parse import unquote
|
|||
|
||||
import pandas
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
||||
|
||||
|
||||
def get_dataframe_from_url(url: URL):
|
||||
def get_dataframe_from_uri(uri: str):
|
||||
df = None
|
||||
if url.uri.endswith(".csv"):
|
||||
df = pandas.read_csv(url.uri)
|
||||
elif url.uri.endswith(".xlsx"):
|
||||
df = pandas.read_excel(url.uri)
|
||||
elif url.uri.startswith("data:"):
|
||||
parts = parse_data_url(url.uri)
|
||||
if uri.endswith(".csv"):
|
||||
df = pandas.read_csv(uri)
|
||||
elif uri.endswith(".xlsx"):
|
||||
df = pandas.read_excel(uri)
|
||||
elif uri.startswith("data:"):
|
||||
parts = parse_data_url(uri)
|
||||
data = parts["data"]
|
||||
if parts["is_base64"]:
|
||||
data = base64.b64decode(data)
|
||||
|
@ -39,6 +38,6 @@ def get_dataframe_from_url(url: URL):
|
|||
else:
|
||||
df = pandas.read_excel(data_bytes)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {url}")
|
||||
raise ValueError(f"Unsupported file type: {uri}")
|
||||
|
||||
return df
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue