From 8a6fa41a936ccb655427ab3df9db056b299c65ee Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 12 Mar 2025 23:44:18 -0700 Subject: [PATCH] more purposes --- docs/_static/llama-stack-spec.html | 4 +++- docs/_static/llama-stack-spec.yaml | 7 +++++-- llama_stack/apis/datasets/datasets.py | 16 ++++++++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 247a15af4..d6ee9334e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -6850,6 +6850,7 @@ "type": "string", "enum": [ "post-training/messages", + "eval/question-answer", "eval/messages-answer" ], "title": "DatasetPurpose", @@ -9442,9 +9443,10 @@ "type": "string", "enum": [ "post-training/messages", + "eval/question-answer", "eval/messages-answer" ], - "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column." + "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation." }, "source": { "$ref": "#/components/schemas/DataSource", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 97e0787ee..4a204f44d 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -4742,6 +4742,7 @@ components: type: string enum: - post-training/messages + - eval/question-answer - eval/messages-answer title: DatasetPurpose description: >- @@ -6394,12 +6395,14 @@ components: type: string enum: - post-training/messages + - eval/question-answer - eval/messages-answer description: >- The purpose of the dataset. One of - "post-training/messages": The dataset contains a messages column with list of messages for post-training. - - "eval/messages-answer": The dataset contains a messages column with list - of messages and an answer column. + "eval/question-answer": The dataset contains a question column and an + answer column for evaluation. - "eval/messages-answer": The dataset contains + a messages column with list of messages and an answer column for evaluation. source: $ref: '#/components/schemas/DataSource' description: >- diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index 9ec05a213..711e3289c 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -24,16 +24,23 @@ class DatasetPurpose(Enum): {"role": "assistant", "content": "Hello, world!"}, ] } + :cvar eval/question-answer: The dataset contains a question column and an answer column. + { + "question": "What is the capital of France?", + "answer": "Paris" + } :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column. { "messages": [ - {"role": "user", "content": "What is the capital of France?"}, + {"role": "user", "content": "Hello, my name is John Doe."}, + {"role": "assistant", "content": "Hello, John Doe. How can I help you today?"}, + {"role": "user", "content": "What's my name?"}, ], - "answer": "Paris" + "answer": "John Doe" } """ - post_training_messages = "post-training/messages" + eval_question_answer = "eval/question-answer" eval_messages_answer = "eval/messages-answer" # TODO: add more schemas here @@ -153,7 +160,8 @@ class Datasets(Protocol): :param purpose: The purpose of the dataset. One of - "post-training/messages": The dataset contains a messages column with list of messages for post-training. - - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column. + - "eval/question-answer": The dataset contains a question column and an answer column for evaluation. + - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation. :param source: The data source of the dataset. Examples: - { "type": "uri",