feat(dataset api): (1.5/n) fix dataset registeration (#1659)

# What does this PR do?

- fix dataset registeration & iterrows
> NOTE: the URL endpoint is changed to datasetio due to flaky path
routing

[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
```
LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/datasets/test_datasets.py
```
<img width="854" alt="image"
src="https://github.com/user-attachments/assets/0168b352-1c5a-48d1-8e9a-93141d418e54"
/>


[//]: # (## Documentation)
This commit is contained in:
Xi Yan 2025-03-15 16:48:09 -07:00 committed by GitHub
parent 2c9d624910
commit a568bf3f9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 159 additions and 248 deletions

View file

@ -10,18 +10,17 @@ from urllib.parse import unquote
import pandas
from llama_stack.apis.common.content_types import URL
from llama_stack.providers.utils.memory.vector_store import parse_data_url
def get_dataframe_from_url(url: URL):
def get_dataframe_from_uri(uri: str):
df = None
if url.uri.endswith(".csv"):
df = pandas.read_csv(url.uri)
elif url.uri.endswith(".xlsx"):
df = pandas.read_excel(url.uri)
elif url.uri.startswith("data:"):
parts = parse_data_url(url.uri)
if uri.endswith(".csv"):
df = pandas.read_csv(uri)
elif uri.endswith(".xlsx"):
df = pandas.read_excel(uri)
elif uri.startswith("data:"):
parts = parse_data_url(uri)
data = parts["data"]
if parts["is_base64"]:
data = base64.b64decode(data)
@ -39,6 +38,6 @@ def get_dataframe_from_url(url: URL):
else:
df = pandas.read_excel(data_bytes)
else:
raise ValueError(f"Unsupported file type: {url}")
raise ValueError(f"Unsupported file type: {uri}")
return df