forked from phoenix-oss/llama-stack-mirror
		
	feat(api): (1/n) datasets api clean up (#1573)
## PR Stack - https://github.com/meta-llama/llama-stack/pull/1573 - https://github.com/meta-llama/llama-stack/pull/1625 - https://github.com/meta-llama/llama-stack/pull/1656 - https://github.com/meta-llama/llama-stack/pull/1657 - https://github.com/meta-llama/llama-stack/pull/1658 - https://github.com/meta-llama/llama-stack/pull/1659 - https://github.com/meta-llama/llama-stack/pull/1660 **Client SDK** - https://github.com/meta-llama/llama-stack-client-python/pull/203 **CI** -1391130488<img width="1042" alt="image" src="https://github.com/user-attachments/assets/69636067-376d-436b-9204-896e2dd490ca" /> -- the test_rag_agent_with_attachments is flaky and not related to this PR ## Doc <img width="789" alt="image" src="https://github.com/user-attachments/assets/b88390f3-73d6-4483-b09a-a192064e32d9" /> ## Client Usage ```python client.datasets.register( source={ "type": "uri", "uri": "lsfs://mydata.jsonl", }, schema="jsonl_messages", # optional dataset_id="my_first_train_data" ) # quick prototype debugging client.datasets.register( data_reference={ "type": "rows", "rows": [ "messages": [...], ], }, schema="jsonl_messages", ) ``` ## Test Plan - CI:1387805545``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/datasets/test_datasets.py ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/scoring/test_scoring.py ``` ``` pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ```
This commit is contained in:
		
							parent
							
								
									3b35a39b8b
								
							
						
					
					
						commit
						5287b437ae
					
				
					 29 changed files with 2593 additions and 2296 deletions
				
			
		|  | @ -10,18 +10,17 @@ from urllib.parse import unquote | |||
| 
 | ||||
| import pandas | ||||
| 
 | ||||
| from llama_stack.apis.common.content_types import URL | ||||
| from llama_stack.providers.utils.memory.vector_store import parse_data_url | ||||
| 
 | ||||
| 
 | ||||
| def get_dataframe_from_url(url: URL): | ||||
| def get_dataframe_from_uri(uri: str): | ||||
|     df = None | ||||
|     if url.uri.endswith(".csv"): | ||||
|         df = pandas.read_csv(url.uri) | ||||
|     elif url.uri.endswith(".xlsx"): | ||||
|         df = pandas.read_excel(url.uri) | ||||
|     elif url.uri.startswith("data:"): | ||||
|         parts = parse_data_url(url.uri) | ||||
|     if uri.endswith(".csv"): | ||||
|         df = pandas.read_csv(uri) | ||||
|     elif uri.endswith(".xlsx"): | ||||
|         df = pandas.read_excel(uri) | ||||
|     elif uri.startswith("data:"): | ||||
|         parts = parse_data_url(uri) | ||||
|         data = parts["data"] | ||||
|         if parts["is_base64"]: | ||||
|             data = base64.b64decode(data) | ||||
|  | @ -39,6 +38,6 @@ def get_dataframe_from_url(url: URL): | |||
|         else: | ||||
|             df = pandas.read_excel(data_bytes) | ||||
|     else: | ||||
|         raise ValueError(f"Unsupported file type: {url}") | ||||
|         raise ValueError(f"Unsupported file type: {uri}") | ||||
| 
 | ||||
|     return df | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue