Merge branch 'pr1573' into api_2

This commit is contained in:
Xi Yan 2025-03-12 23:36:03 -07:00
commit f90dcd2a69
3 changed files with 31 additions and 20 deletions

View file

@ -6811,10 +6811,10 @@
"type": "string", "type": "string",
"enum": [ "enum": [
"post-training/messages", "post-training/messages",
"eval/question-answer" "eval/messages-answer"
], ],
"title": "DatasetPurpose", "title": "DatasetPurpose",
"description": "Purpose of the dataset. Each type has a different column format." "description": "Purpose of the dataset. Each purpose has a required input data schema."
}, },
"source": { "source": {
"$ref": "#/components/schemas/DataSource" "$ref": "#/components/schemas/DataSource"
@ -9885,13 +9885,13 @@
"type": "string", "type": "string",
"enum": [ "enum": [
"post-training/messages", "post-training/messages",
"eval/question-answer" "eval/messages-answer"
], ],
"description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/question-answer\": The dataset contains a question and answer column." "description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column."
}, },
"source": { "source": {
"$ref": "#/components/schemas/DataSource", "$ref": "#/components/schemas/DataSource",
"description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"huggingface\": { \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",

View file

@ -4716,10 +4716,10 @@ components:
type: string type: string
enum: enum:
- post-training/messages - post-training/messages
- eval/question-answer - eval/messages-answer
title: DatasetPurpose title: DatasetPurpose
description: >- description: >-
Purpose of the dataset. Each type has a different column format. Purpose of the dataset. Each purpose has a required input data schema.
source: source:
$ref: '#/components/schemas/DataSource' $ref: '#/components/schemas/DataSource'
metadata: metadata:
@ -6776,20 +6776,21 @@ components:
type: string type: string
enum: enum:
- post-training/messages - post-training/messages
- eval/question-answer - eval/messages-answer
description: >- description: >-
The purpose of the dataset. One of - "post-training/messages": The dataset The purpose of the dataset. One of - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. - contains a messages column with list of messages for post-training. -
"eval/question-answer": The dataset contains a question and answer column. "eval/messages-answer": The dataset contains a messages column with list
of messages and an answer column.
source: source:
$ref: '#/components/schemas/DataSource' $ref: '#/components/schemas/DataSource'
description: >- description: >-
The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl" The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
} - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface", } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface",
"dataset_path": "tatsu-lab/alpaca", "params": { "split": "train" } } - "huggingface": { "dataset_path": "tatsu-lab/alpaca", "params": { "split":
{ "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": "train" } } } - { "type": "rows", "rows": [ { "messages": [ {"role": "user",
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
} ] } world!"}, ] } ] }
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:

View file

@ -15,18 +15,26 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
class DatasetPurpose(Enum): class DatasetPurpose(Enum):
""" """
Purpose of the dataset. Each type has a different column format. Purpose of the dataset. Each purpose has a required input data schema.
:cvar post-training/messages: The dataset contains messages used for post-training. Examples:
:cvar post-training/messages: The dataset contains messages used for post-training.
{ {
"messages": [ "messages": [
{"role": "user", "content": "Hello, world!"}, {"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"},
] ]
} }
:cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
{
"messages": [
{"role": "user", "content": "What is the capital of France?"},
],
"answer": "Paris"
}
""" """
post_training_messages = "post-training/messages" post_training_messages = "post-training/messages"
eval_question_answer = "eval/question-answer" eval_messages_answer = "eval/messages-answer"
# TODO: add more schemas here # TODO: add more schemas here
@ -145,7 +153,7 @@ class Datasets(Protocol):
:param purpose: The purpose of the dataset. One of :param purpose: The purpose of the dataset. One of
- "post-training/messages": The dataset contains a messages column with list of messages for post-training. - "post-training/messages": The dataset contains a messages column with list of messages for post-training.
- "eval/question-answer": The dataset contains a question and answer column. - "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column.
:param source: The data source of the dataset. Examples: :param source: The data source of the dataset. Examples:
- { - {
"type": "uri", "type": "uri",
@ -157,11 +165,13 @@ class Datasets(Protocol):
} }
- { - {
"type": "huggingface", "type": "huggingface",
"huggingface": {
"dataset_path": "tatsu-lab/alpaca", "dataset_path": "tatsu-lab/alpaca",
"params": { "params": {
"split": "train" "split": "train"
} }
} }
}
- { - {
"type": "rows", "type": "rows",
"rows": [ "rows": [