This commit is contained in:
Xi Yan 2025-03-12 18:46:40 -07:00
parent 18de4cd08a
commit a3173e8284
3 changed files with 95 additions and 41 deletions

View file

@ -4738,13 +4738,14 @@ components:
type: string
const: dataset
default: dataset
schema:
purpose:
type: string
enum:
- messages
title: Schema
- post-training/messages
- eval/question-answer
title: DatasetPurpose
description: >-
Schema of the dataset. Each type has a different column format.
Purpose of the dataset. Each type has a different column format.
data_source:
$ref: '#/components/schemas/DataSource'
metadata:
@ -4763,7 +4764,7 @@ components:
- provider_resource_id
- provider_id
- type
- schema
- purpose
- data_source
- metadata
title: Dataset
@ -4774,8 +4775,10 @@ components:
type: string
const: huggingface
default: huggingface
dataset_path:
path:
type: string
description: >-
The path to the dataset in Huggingface. E.g. - "llamastack/simpleqa"
params:
type: object
additionalProperties:
@ -4786,12 +4789,14 @@ components:
- type: string
- type: array
- type: object
description: The parameters for the dataset.
additionalProperties: false
required:
- type
- dataset_path
- path
- params
title: HuggingfaceDataSource
description: A dataset stored in Huggingface.
RowsDataSource:
type: object
properties:
@ -4811,11 +4816,16 @@ components:
- type: string
- type: array
- type: object
description: >-
The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
world!"}]} ]
additionalProperties: false
required:
- type
- rows
title: RowsDataSource
description: A dataset stored in rows.
URIDataSource:
type: object
properties:
@ -4825,11 +4835,16 @@ components:
default: uri
uri:
type: string
description: >-
The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
- "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
additionalProperties: false
required:
- type
- uri
title: URIDataSource
description: >-
A dataset that can be obtained from a URI.
Model:
type: object
properties:
@ -6367,14 +6382,16 @@ components:
RegisterDatasetRequest:
type: object
properties:
schema:
purpose:
type: string
enum:
- messages
- post-training/messages
- eval/question-answer
description: >-
The schema format of the dataset. One of - messages: The dataset contains
a messages column with list of messages for post-training.
data_source:
The purpose of the dataset. One of - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. -
"eval/question-answer": The dataset contains a question and answer column.
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
@ -6401,8 +6418,8 @@ components:
The ID of the dataset. If not provided, a random ID will be generated.
additionalProperties: false
required:
- schema
- data_source
- purpose
- source
title: RegisterDatasetRequest
RegisterModelRequest:
type: object