more purposes

This commit is contained in:
Xi Yan 2025-03-12 23:44:18 -07:00
parent 0df33049e3
commit 8a6fa41a93
3 changed files with 20 additions and 7 deletions

View file

@ -6850,6 +6850,7 @@
"type": "string", "type": "string",
"enum": [ "enum": [
"post-training/messages", "post-training/messages",
"eval/question-answer",
"eval/messages-answer" "eval/messages-answer"
], ],
"title": "DatasetPurpose", "title": "DatasetPurpose",
@ -9442,9 +9443,10 @@
"type": "string", "type": "string",
"enum": [ "enum": [
"post-training/messages", "post-training/messages",
"eval/question-answer",
"eval/messages-answer" "eval/messages-answer"
], ],
"description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column." "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation."
}, },
"source": { "source": {
"$ref": "#/components/schemas/DataSource", "$ref": "#/components/schemas/DataSource",

View file

@ -4742,6 +4742,7 @@ components:
type: string type: string
enum: enum:
- post-training/messages - post-training/messages
- eval/question-answer
- eval/messages-answer - eval/messages-answer
title: DatasetPurpose title: DatasetPurpose
description: >- description: >-
@ -6394,12 +6395,14 @@ components:
type: string type: string
enum: enum:
- post-training/messages - post-training/messages
- eval/question-answer
- eval/messages-answer - eval/messages-answer
description: >- description: >-
The purpose of the dataset. One of - "post-training/messages": The dataset The purpose of the dataset. One of - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. - contains a messages column with list of messages for post-training. -
"eval/messages-answer": The dataset contains a messages column with list "eval/question-answer": The dataset contains a question column and an
of messages and an answer column. answer column for evaluation. - "eval/messages-answer": The dataset contains
a messages column with list of messages and an answer column for evaluation.
source: source:
$ref: '#/components/schemas/DataSource' $ref: '#/components/schemas/DataSource'
description: >- description: >-

View file

@ -24,16 +24,23 @@ class DatasetPurpose(Enum):
{"role": "assistant", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"},
] ]
} }
:cvar eval/question-answer: The dataset contains a question column and an answer column.
{
"question": "What is the capital of France?",
"answer": "Paris"
}
:cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column. :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
{ {
"messages": [ "messages": [
{"role": "user", "content": "What is the capital of France?"}, {"role": "user", "content": "Hello, my name is John Doe."},
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
{"role": "user", "content": "What's my name?"},
], ],
"answer": "Paris" "answer": "John Doe"
} }
""" """
post_training_messages = "post-training/messages" post_training_messages = "post-training/messages"
eval_question_answer = "eval/question-answer"
eval_messages_answer = "eval/messages-answer" eval_messages_answer = "eval/messages-answer"
# TODO: add more schemas here # TODO: add more schemas here
@ -153,7 +160,8 @@ class Datasets(Protocol):
:param purpose: The purpose of the dataset. One of :param purpose: The purpose of the dataset. One of
- "post-training/messages": The dataset contains a messages column with list of messages for post-training. - "post-training/messages": The dataset contains a messages column with list of messages for post-training.
- "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column. - "eval/question-answer": The dataset contains a question column and an answer column for evaluation.
- "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation.
:param source: The data source of the dataset. Examples: :param source: The data source of the dataset. Examples:
- { - {
"type": "uri", "type": "uri",