forked from phoenix-oss/llama-stack-mirror
update
This commit is contained in:
parent
18de4cd08a
commit
a3173e8284
3 changed files with 95 additions and 41 deletions
49
docs/_static/llama-stack-spec.html
vendored
49
docs/_static/llama-stack-spec.html
vendored
|
@ -6846,13 +6846,14 @@
|
||||||
"const": "dataset",
|
"const": "dataset",
|
||||||
"default": "dataset"
|
"default": "dataset"
|
||||||
},
|
},
|
||||||
"schema": {
|
"purpose": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
"messages"
|
"post-training/messages",
|
||||||
|
"eval/question-answer"
|
||||||
],
|
],
|
||||||
"title": "Schema",
|
"title": "DatasetPurpose",
|
||||||
"description": "Schema of the dataset. Each type has a different column format."
|
"description": "Purpose of the dataset. Each type has a different column format."
|
||||||
},
|
},
|
||||||
"data_source": {
|
"data_source": {
|
||||||
"$ref": "#/components/schemas/DataSource"
|
"$ref": "#/components/schemas/DataSource"
|
||||||
|
@ -6889,7 +6890,7 @@
|
||||||
"provider_resource_id",
|
"provider_resource_id",
|
||||||
"provider_id",
|
"provider_id",
|
||||||
"type",
|
"type",
|
||||||
"schema",
|
"purpose",
|
||||||
"data_source",
|
"data_source",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
|
@ -6903,8 +6904,9 @@
|
||||||
"const": "huggingface",
|
"const": "huggingface",
|
||||||
"default": "huggingface"
|
"default": "huggingface"
|
||||||
},
|
},
|
||||||
"dataset_path": {
|
"path": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The path to the dataset in Huggingface. E.g. - \"llamastack/simpleqa\""
|
||||||
},
|
},
|
||||||
"params": {
|
"params": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6929,16 +6931,18 @@
|
||||||
"type": "object"
|
"type": "object"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"description": "The parameters for the dataset."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"type",
|
"type",
|
||||||
"dataset_path",
|
"path",
|
||||||
"params"
|
"params"
|
||||||
],
|
],
|
||||||
"title": "HuggingfaceDataSource"
|
"title": "HuggingfaceDataSource",
|
||||||
|
"description": "A dataset stored in Huggingface."
|
||||||
},
|
},
|
||||||
"RowsDataSource": {
|
"RowsDataSource": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6974,7 +6978,8 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
"description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -6982,7 +6987,8 @@
|
||||||
"type",
|
"type",
|
||||||
"rows"
|
"rows"
|
||||||
],
|
],
|
||||||
"title": "RowsDataSource"
|
"title": "RowsDataSource",
|
||||||
|
"description": "A dataset stored in rows."
|
||||||
},
|
},
|
||||||
"URIDataSource": {
|
"URIDataSource": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -6993,7 +6999,8 @@
|
||||||
"default": "uri"
|
"default": "uri"
|
||||||
},
|
},
|
||||||
"uri": {
|
"uri": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -7001,7 +7008,8 @@
|
||||||
"type",
|
"type",
|
||||||
"uri"
|
"uri"
|
||||||
],
|
],
|
||||||
"title": "URIDataSource"
|
"title": "URIDataSource",
|
||||||
|
"description": "A dataset that can be obtained from a URI."
|
||||||
},
|
},
|
||||||
"Model": {
|
"Model": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9419,14 +9427,15 @@
|
||||||
"RegisterDatasetRequest": {
|
"RegisterDatasetRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"schema": {
|
"purpose": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
"messages"
|
"post-training/messages",
|
||||||
|
"eval/question-answer"
|
||||||
],
|
],
|
||||||
"description": "The schema format of the dataset. One of - messages: The dataset contains a messages column with list of messages for post-training."
|
"description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. - \"eval/question-answer\": The dataset contains a question and answer column."
|
||||||
},
|
},
|
||||||
"data_source": {
|
"source": {
|
||||||
"$ref": "#/components/schemas/DataSource",
|
"$ref": "#/components/schemas/DataSource",
|
||||||
"description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
|
"description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
|
||||||
},
|
},
|
||||||
|
@ -9463,8 +9472,8 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"schema",
|
"purpose",
|
||||||
"data_source"
|
"source"
|
||||||
],
|
],
|
||||||
"title": "RegisterDatasetRequest"
|
"title": "RegisterDatasetRequest"
|
||||||
},
|
},
|
||||||
|
|
45
docs/_static/llama-stack-spec.yaml
vendored
45
docs/_static/llama-stack-spec.yaml
vendored
|
@ -4738,13 +4738,14 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: dataset
|
const: dataset
|
||||||
default: dataset
|
default: dataset
|
||||||
schema:
|
purpose:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
- messages
|
- post-training/messages
|
||||||
title: Schema
|
- eval/question-answer
|
||||||
|
title: DatasetPurpose
|
||||||
description: >-
|
description: >-
|
||||||
Schema of the dataset. Each type has a different column format.
|
Purpose of the dataset. Each type has a different column format.
|
||||||
data_source:
|
data_source:
|
||||||
$ref: '#/components/schemas/DataSource'
|
$ref: '#/components/schemas/DataSource'
|
||||||
metadata:
|
metadata:
|
||||||
|
@ -4763,7 +4764,7 @@ components:
|
||||||
- provider_resource_id
|
- provider_resource_id
|
||||||
- provider_id
|
- provider_id
|
||||||
- type
|
- type
|
||||||
- schema
|
- purpose
|
||||||
- data_source
|
- data_source
|
||||||
- metadata
|
- metadata
|
||||||
title: Dataset
|
title: Dataset
|
||||||
|
@ -4774,8 +4775,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: huggingface
|
const: huggingface
|
||||||
default: huggingface
|
default: huggingface
|
||||||
dataset_path:
|
path:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
The path to the dataset in Huggingface. E.g. - "llamastack/simpleqa"
|
||||||
params:
|
params:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -4786,12 +4789,14 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: The parameters for the dataset.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- dataset_path
|
- path
|
||||||
- params
|
- params
|
||||||
title: HuggingfaceDataSource
|
title: HuggingfaceDataSource
|
||||||
|
description: A dataset stored in Huggingface.
|
||||||
RowsDataSource:
|
RowsDataSource:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4811,11 +4816,16 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: >-
|
||||||
|
The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
|
||||||
|
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
|
||||||
|
world!"}]} ]
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- rows
|
- rows
|
||||||
title: RowsDataSource
|
title: RowsDataSource
|
||||||
|
description: A dataset stored in rows.
|
||||||
URIDataSource:
|
URIDataSource:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -4825,11 +4835,16 @@ components:
|
||||||
default: uri
|
default: uri
|
||||||
uri:
|
uri:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
|
||||||
|
- "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- uri
|
- uri
|
||||||
title: URIDataSource
|
title: URIDataSource
|
||||||
|
description: >-
|
||||||
|
A dataset that can be obtained from a URI.
|
||||||
Model:
|
Model:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6367,14 +6382,16 @@ components:
|
||||||
RegisterDatasetRequest:
|
RegisterDatasetRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
schema:
|
purpose:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
- messages
|
- post-training/messages
|
||||||
|
- eval/question-answer
|
||||||
description: >-
|
description: >-
|
||||||
The schema format of the dataset. One of - messages: The dataset contains
|
The purpose of the dataset. One of - "post-training/messages": The dataset
|
||||||
a messages column with list of messages for post-training.
|
contains a messages column with list of messages for post-training. -
|
||||||
data_source:
|
"eval/question-answer": The dataset contains a question and answer column.
|
||||||
|
source:
|
||||||
$ref: '#/components/schemas/DataSource'
|
$ref: '#/components/schemas/DataSource'
|
||||||
description: >-
|
description: >-
|
||||||
The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
|
The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
|
||||||
|
@ -6401,8 +6418,8 @@ components:
|
||||||
The ID of the dataset. If not provided, a random ID will be generated.
|
The ID of the dataset. If not provided, a random ID will be generated.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- schema
|
- purpose
|
||||||
- data_source
|
- source
|
||||||
title: RegisterDatasetRequest
|
title: RegisterDatasetRequest
|
||||||
RegisterModelRequest:
|
RegisterModelRequest:
|
||||||
type: object
|
type: object
|
||||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
||||||
class DatasetPurpose(Enum):
|
class DatasetPurpose(Enum):
|
||||||
"""
|
"""
|
||||||
Purpose of the dataset. Each type has a different column format.
|
Purpose of the dataset. Each type has a different column format.
|
||||||
:cvar tuning/messages: The dataset contains messages used for post-training. Examples:
|
:cvar post-training/messages: The dataset contains messages used for post-training. Examples:
|
||||||
{
|
{
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "user", "content": "Hello, world!"},
|
{"role": "user", "content": "Hello, world!"},
|
||||||
|
@ -25,12 +25,19 @@ class DatasetPurpose(Enum):
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tuning_messages = "tuning/messages"
|
post_training_messages = "post-training/messages"
|
||||||
|
eval_question_answer = "eval/question-answer"
|
||||||
|
|
||||||
# TODO: add more schemas here
|
# TODO: add more schemas here
|
||||||
|
|
||||||
|
|
||||||
class DatasetType(Enum):
|
class DatasetType(Enum):
|
||||||
|
"""
|
||||||
|
Type of the dataset source.
|
||||||
|
:cvar huggingface: The dataset is stored in Huggingface.
|
||||||
|
:cvar uri: The dataset can be obtained from a URI.
|
||||||
|
:cvar rows: The dataset is stored in rows.
|
||||||
|
"""
|
||||||
huggingface = "huggingface"
|
huggingface = "huggingface"
|
||||||
uri = "uri"
|
uri = "uri"
|
||||||
rows = "rows"
|
rows = "rows"
|
||||||
|
@ -38,19 +45,36 @@ class DatasetType(Enum):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class URIDataSource(BaseModel):
|
class URIDataSource(BaseModel):
|
||||||
|
"""A dataset that can be obtained from a URI.
|
||||||
|
:param uri: The dataset can be obtained from a URI. E.g.
|
||||||
|
- "https://mywebsite.com/mydata.jsonl"
|
||||||
|
- "lsfs://mydata.jsonl"
|
||||||
|
- "data:csv;base64,{base64_content}"
|
||||||
|
"""
|
||||||
type: Literal["uri"] = "uri"
|
type: Literal["uri"] = "uri"
|
||||||
uri: str
|
uri: str
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class HuggingfaceDataSource(BaseModel):
|
class HuggingfaceDataSource(BaseModel):
|
||||||
|
"""A dataset stored in Huggingface.
|
||||||
|
:param path: The path to the dataset in Huggingface. E.g.
|
||||||
|
- "llamastack/simpleqa"
|
||||||
|
:param params: The parameters for the dataset.
|
||||||
|
"""
|
||||||
type: Literal["huggingface"] = "huggingface"
|
type: Literal["huggingface"] = "huggingface"
|
||||||
dataset_path: str
|
path: str
|
||||||
params: Dict[str, Any]
|
params: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class RowsDataSource(BaseModel):
|
class RowsDataSource(BaseModel):
|
||||||
|
"""A dataset stored in rows.
|
||||||
|
:param rows: The dataset is stored in rows. E.g.
|
||||||
|
- [
|
||||||
|
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
|
||||||
|
]
|
||||||
|
"""
|
||||||
type: Literal["rows"] = "rows"
|
type: Literal["rows"] = "rows"
|
||||||
rows: List[Dict[str, Any]]
|
rows: List[Dict[str, Any]]
|
||||||
|
|
||||||
|
@ -65,7 +89,10 @@ DataSource = register_schema(
|
||||||
|
|
||||||
|
|
||||||
class CommonDatasetFields(BaseModel):
|
class CommonDatasetFields(BaseModel):
|
||||||
schema: Schema
|
"""
|
||||||
|
Common fields for a dataset.
|
||||||
|
"""
|
||||||
|
purpose: DatasetPurpose
|
||||||
data_source: DataSource
|
data_source: DataSource
|
||||||
metadata: Dict[str, Any] = Field(
|
metadata: Dict[str, Any] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
|
@ -108,9 +135,10 @@ class Datasets(Protocol):
|
||||||
"""
|
"""
|
||||||
Register a new dataset.
|
Register a new dataset.
|
||||||
|
|
||||||
:param schema: The schema format of the dataset. One of
|
:param purpose: The purpose of the dataset. One of
|
||||||
- messages: The dataset contains a messages column with list of messages for post-training.
|
- "post-training/messages": The dataset contains a messages column with list of messages for post-training.
|
||||||
:param data_source: The data source of the dataset. Examples:
|
- "eval/question-answer": The dataset contains a question and answer column.
|
||||||
|
:param source: The data source of the dataset. Examples:
|
||||||
- {
|
- {
|
||||||
"type": "uri",
|
"type": "uri",
|
||||||
"uri": "https://mywebsite.com/mydata.jsonl"
|
"uri": "https://mywebsite.com/mydata.jsonl"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue