diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index f7166bb65..fc213b719 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -6850,10 +6850,10 @@ "type": "string", "enum": [ "post-training/messages", - "eval/question-answer" + "eval/messages-answer" ], "title": "DatasetPurpose", - "description": "Purpose of the dataset. Each type has a different column format." + "description": "Purpose of the dataset. Each purpose has a required input data schema." }, "source": { "$ref": "#/components/schemas/DataSource" @@ -9442,9 +9442,9 @@ "type": "string", "enum": [ "post-training/messages", - "eval/question-answer" + "eval/messages-answer" ], - "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/question-answer\": The dataset contains a question and answer column." + "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - Example data rows: { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column. - Example data rows: { \"messages\": [ {\"role\": \"user\", \"content\": \"What is the capital of France?\"}, ], \"answer\": \"Paris\" }" }, "source": { "$ref": "#/components/schemas/DataSource", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 10db07f02..e3355fc78 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -4742,10 +4742,10 @@ components: type: string enum: - post-training/messages - - eval/question-answer + - eval/messages-answer title: DatasetPurpose description: >- - Purpose of the dataset. Each type has a different column format. + Purpose of the dataset. Each purpose has a required input data schema. source: $ref: '#/components/schemas/DataSource' metadata: @@ -6394,11 +6394,15 @@ components: type: string enum: - post-training/messages - - eval/question-answer + - eval/messages-answer description: >- The purpose of the dataset. One of - "post-training/messages": The dataset contains a messages column with list of messages for post-training. - - "eval/question-answer": The dataset contains a question and answer column. + Example data rows: { "messages": [ {"role": "user", "content": "Hello, + world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } - "eval/messages-answer": + The dataset contains a messages column with list of messages and an answer + column. - Example data rows: { "messages": [ {"role": "user", "content": + "What is the capital of France?"}, ], "answer": "Paris" } source: $ref: '#/components/schemas/DataSource' description: >- diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index cfbd6b4ac..a731da6ba 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -16,17 +16,17 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho class DatasetPurpose(Enum): """ Purpose of the dataset. Each purpose has a required input data schema. - - :cvar post-training/messages: The dataset contains messages used for post-training. Examples: + + :cvar post-training/messages: The dataset contains messages used for post-training. { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } - :cvar eval/question-answer: The dataset contains a question and answer column. + :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column. { - "question": [ + "messages": [ {"role": "user", "content": "What is the capital of France?"}, ], "answer": "Paris" @@ -34,7 +34,7 @@ class DatasetPurpose(Enum): """ post_training_messages = "post-training/messages" - eval_question_answer = "eval/question-answer" + eval_messages_answer = "eval/messages-answer" # TODO: add more schemas here @@ -153,7 +153,21 @@ class Datasets(Protocol): :param purpose: The purpose of the dataset. One of - "post-training/messages": The dataset contains a messages column with list of messages for post-training. - - "eval/question-answer": The dataset contains a question and answer column. + - Example data rows: + { + "messages": [ + {"role": "user", "content": "Hello, world!"}, + {"role": "assistant", "content": "Hello, world!"}, + ] + } + - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column. + - Example data rows: + { + "messages": [ + {"role": "user", "content": "What is the capital of France?"}, + ], + "answer": "Paris" + } :param source: The data source of the dataset. Examples: - { "type": "uri",