From 4f6f0f6a9101619e86d90e708f00ef86c8588283 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Wed, 12 Mar 2025 23:27:01 -0700
Subject: [PATCH 1/4] update doc

---
 docs/_static/llama-stack-spec.html    |  2 +-
 docs/_static/llama-stack-spec.yaml    |  8 ++++----
 llama_stack/apis/datasets/datasets.py | 15 ++++++++++++---
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 44459f2b9..f7166bb65 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -9448,7 +9448,7 @@
                     },
                     "source": {
                         "$ref": "#/components/schemas/DataSource",
-                        "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
+                        "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"huggingface\": { \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
                     },
                     "metadata": {
                         "type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a257b2a7d..10db07f02 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -6404,10 +6404,10 @@ components:
           description: >-
             The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
             } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface",
-            "dataset_path": "tatsu-lab/alpaca", "params": { "split": "train" } } -
-            { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
+            "huggingface": { "dataset_path": "tatsu-lab/alpaca", "params": { "split":
+            "train" } } } - { "type": "rows", "rows": [ { "messages": [ {"role": "user",
+            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
+            world!"}, ] } ] }
         metadata:
           type: object
           additionalProperties:
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index 20587a29e..71118667f 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -23,6 +23,13 @@ class DatasetPurpose(Enum):
                 {"role": "assistant", "content": "Hello, world!"},
             ]
         }
+    :cvar eval/question-answer: The dataset contains a question and answer column.
+        {
+            "question": [
+                {"role": "user", "content": "What is the capital of France?"},
+            ],
+            "answer": "Paris"
+        }
     """
 
     post_training_messages = "post-training/messages"
@@ -157,9 +164,11 @@ class Datasets(Protocol):
            }
            - {
                "type": "huggingface",
-               "dataset_path": "tatsu-lab/alpaca",
-               "params": {
-                   "split": "train"
+               "huggingface": {
+                   "dataset_path": "tatsu-lab/alpaca",
+                   "params": {
+                       "split": "train"
+                   }
                }
            }
            - {

From 772339bebfe32b18ff6549a36fd2b925fef9d572 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Wed, 12 Mar 2025 23:27:45 -0700
Subject: [PATCH 2/4] update doc

---
 llama_stack/apis/datasets/datasets.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index 71118667f..cfbd6b4ac 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -15,7 +15,8 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
 
 class DatasetPurpose(Enum):
     """
-    Purpose of the dataset. Each type has a different column format.
+    Purpose of the dataset. Each purpose has a required input data schema.
+    
     :cvar post-training/messages: The dataset contains messages used for post-training. Examples:
         {
             "messages": [

From b4d118fc5c58c335073e0ed633d84744878c3d58 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Wed, 12 Mar 2025 23:30:47 -0700
Subject: [PATCH 3/4] update doc

---
 docs/_static/llama-stack-spec.html    |  8 ++++----
 docs/_static/llama-stack-spec.yaml    | 12 ++++++++----
 llama_stack/apis/datasets/datasets.py | 26 ++++++++++++++++++++------
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index f7166bb65..fc213b719 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -6850,10 +6850,10 @@
                         "type": "string",
                         "enum": [
                             "post-training/messages",
-                            "eval/question-answer"
+                            "eval/messages-answer"
                         ],
                         "title": "DatasetPurpose",
-                        "description": "Purpose of the dataset. Each type has a different column format."
+                        "description": "Purpose of the dataset. Each purpose has a required input data schema."
                     },
                     "source": {
                         "$ref": "#/components/schemas/DataSource"
@@ -9442,9 +9442,9 @@
                         "type": "string",
                         "enum": [
                             "post-training/messages",
-                            "eval/question-answer"
+                            "eval/messages-answer"
                         ],
-                        "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/question-answer\": The dataset contains a question and answer column."
+                        "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - Example data rows: { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column. - Example data rows: { \"messages\": [ {\"role\": \"user\", \"content\": \"What is the capital of France?\"}, ], \"answer\": \"Paris\" }"
                     },
                     "source": {
                         "$ref": "#/components/schemas/DataSource",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 10db07f02..e3355fc78 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -4742,10 +4742,10 @@ components:
           type: string
           enum:
             - post-training/messages
-            - eval/question-answer
+            - eval/messages-answer
           title: DatasetPurpose
           description: >-
-            Purpose of the dataset. Each type has a different column format.
+            Purpose of the dataset. Each purpose has a required input data schema.
         source:
           $ref: '#/components/schemas/DataSource'
         metadata:
@@ -6394,11 +6394,15 @@ components:
           type: string
           enum:
             - post-training/messages
-            - eval/question-answer
+            - eval/messages-answer
           description: >-
             The purpose of the dataset. One of - "post-training/messages": The dataset
             contains a messages column with list of messages for post-training. -
-            "eval/question-answer": The dataset contains a question and answer column.
+            Example data rows: { "messages": [ {"role": "user", "content": "Hello,
+            world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column. - Example data rows: { "messages": [ {"role": "user", "content":
+            "What is the capital of France?"}, ], "answer": "Paris" }
         source:
           $ref: '#/components/schemas/DataSource'
           description: >-
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index cfbd6b4ac..a731da6ba 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -16,17 +16,17 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
 class DatasetPurpose(Enum):
     """
     Purpose of the dataset. Each purpose has a required input data schema.
-    
-    :cvar post-training/messages: The dataset contains messages used for post-training. Examples:
+
+    :cvar post-training/messages: The dataset contains messages used for post-training.
         {
             "messages": [
                 {"role": "user", "content": "Hello, world!"},
                 {"role": "assistant", "content": "Hello, world!"},
             ]
         }
-    :cvar eval/question-answer: The dataset contains a question and answer column.
+    :cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
         {
-            "question": [
+            "messages": [
                 {"role": "user", "content": "What is the capital of France?"},
             ],
             "answer": "Paris"
@@ -34,7 +34,7 @@ class DatasetPurpose(Enum):
     """
 
     post_training_messages = "post-training/messages"
-    eval_question_answer = "eval/question-answer"
+    eval_messages_answer = "eval/messages-answer"
 
     # TODO: add more schemas here
 
@@ -153,7 +153,21 @@ class Datasets(Protocol):
 
         :param purpose: The purpose of the dataset. One of
             - "post-training/messages": The dataset contains a messages column with list of messages for post-training.
-            - "eval/question-answer": The dataset contains a question and answer column.
+                - Example data rows:
+                    {
+                        "messages": [
+                            {"role": "user", "content": "Hello, world!"},
+                            {"role": "assistant", "content": "Hello, world!"},
+                        ]
+                    }
+            - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column.
+                - Example data rows:
+                    {
+                        "messages": [
+                            {"role": "user", "content": "What is the capital of France?"},
+                        ],
+                        "answer": "Paris"
+                    }
         :param source: The data source of the dataset. Examples:
            - {
                "type": "uri",

From 0df33049e3cef90f0e49410890926dd4d4a1107b Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Wed, 12 Mar 2025 23:32:54 -0700
Subject: [PATCH 4/4] update doc

---
 docs/_static/llama-stack-spec.html    |  2 +-
 docs/_static/llama-stack-spec.yaml    |  7 ++-----
 llama_stack/apis/datasets/datasets.py | 14 --------------
 3 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index fc213b719..247a15af4 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -9444,7 +9444,7 @@
                             "post-training/messages",
                             "eval/messages-answer"
                         ],
-                        "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - Example data rows: { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column. - Example data rows: { \"messages\": [ {\"role\": \"user\", \"content\": \"What is the capital of France?\"}, ], \"answer\": \"Paris\" }"
+                        "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column."
                     },
                     "source": {
                         "$ref": "#/components/schemas/DataSource",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index e3355fc78..97e0787ee 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -6398,11 +6398,8 @@ components:
           description: >-
             The purpose of the dataset. One of - "post-training/messages": The dataset
             contains a messages column with list of messages for post-training. -
-            Example data rows: { "messages": [ {"role": "user", "content": "Hello,
-            world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column. - Example data rows: { "messages": [ {"role": "user", "content":
-            "What is the capital of France?"}, ], "answer": "Paris" }
+            "eval/messages-answer": The dataset contains a messages column with list
+            of messages and an answer column.
         source:
           $ref: '#/components/schemas/DataSource'
           description: >-
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index a731da6ba..9ec05a213 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -153,21 +153,7 @@ class Datasets(Protocol):
 
         :param purpose: The purpose of the dataset. One of
             - "post-training/messages": The dataset contains a messages column with list of messages for post-training.
-                - Example data rows:
-                    {
-                        "messages": [
-                            {"role": "user", "content": "Hello, world!"},
-                            {"role": "assistant", "content": "Hello, world!"},
-                        ]
-                    }
             - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column.
-                - Example data rows:
-                    {
-                        "messages": [
-                            {"role": "user", "content": "What is the capital of France?"},
-                        ],
-                        "answer": "Paris"
-                    }
         :param source: The data source of the dataset. Examples:
            - {
                "type": "uri",