Merge branch 'pr1573' into api_2

This commit is contained in:
Xi Yan 2025-03-12 00:19:25 -07:00
commit af4216f34f
3 changed files with 58 additions and 58 deletions

View file

@ -2518,7 +2518,7 @@
"tags": [
"Datasets"
],
"description": "Register a new dataset through a file or",
"description": "Register a new dataset.",
"parameters": [],
"requestBody": {
"content": {
@ -7144,24 +7144,24 @@
],
"title": "Benchmark"
},
"DataReference": {
"DataSource": {
"oneOf": [
{
"$ref": "#/components/schemas/URIDataReference"
"$ref": "#/components/schemas/URIDataSource"
},
{
"$ref": "#/components/schemas/HuggingfaceDataReference"
"$ref": "#/components/schemas/HuggingfaceDataSource"
},
{
"$ref": "#/components/schemas/RowsDataReference"
"$ref": "#/components/schemas/RowsDataSource"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"uri": "#/components/schemas/URIDataReference",
"huggingface": "#/components/schemas/HuggingfaceDataReference",
"rows": "#/components/schemas/RowsDataReference"
"uri": "#/components/schemas/URIDataSource",
"huggingface": "#/components/schemas/HuggingfaceDataSource",
"rows": "#/components/schemas/RowsDataSource"
}
}
},
@ -7185,13 +7185,13 @@
"schema": {
"type": "string",
"enum": [
"jsonl_messages"
"messages"
],
"title": "Schema",
"description": "Schema of the dataset. Each type has a different column format."
},
"data_reference": {
"$ref": "#/components/schemas/DataReference"
"data_source": {
"$ref": "#/components/schemas/DataSource"
},
"metadata": {
"type": "object",
@ -7226,12 +7226,12 @@
"provider_id",
"type",
"schema",
"data_reference",
"data_source",
"metadata"
],
"title": "Dataset"
},
"HuggingfaceDataReference": {
"HuggingfaceDataSource": {
"type": "object",
"properties": {
"type": {
@ -7274,9 +7274,9 @@
"dataset_path",
"params"
],
"title": "HuggingfaceDataReference"
"title": "HuggingfaceDataSource"
},
"RowsDataReference": {
"RowsDataSource": {
"type": "object",
"properties": {
"type": {
@ -7318,9 +7318,9 @@
"type",
"rows"
],
"title": "RowsDataReference"
"title": "RowsDataSource"
},
"URIDataReference": {
"URIDataSource": {
"type": "object",
"properties": {
"type": {
@ -7337,7 +7337,7 @@
"type",
"uri"
],
"title": "URIDataReference"
"title": "URIDataSource"
},
"Model": {
"type": "object",
@ -9506,9 +9506,9 @@
"schema": {
"type": "string",
"enum": [
"jsonl_messages"
"messages"
],
"description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format"
"description": "The schema format of the dataset. One of - messages: The dataset contains a messages column with list of messages for post-training."
},
"data_reference": {
"$ref": "#/components/schemas/DataReference",
@ -9548,7 +9548,7 @@
"additionalProperties": false,
"required": [
"schema",
"data_reference"
"data_source"
],
"title": "RegisterDatasetRequest"
},

View file

@ -1698,7 +1698,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
description: Register a new dataset through a file or
description: Register a new dataset.
parameters: []
requestBody:
content:
@ -4974,17 +4974,17 @@ components:
- scoring_functions
- metadata
title: Benchmark
DataReference:
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataReference'
- $ref: '#/components/schemas/HuggingfaceDataReference'
- $ref: '#/components/schemas/RowsDataReference'
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/HuggingfaceDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataReference'
huggingface: '#/components/schemas/HuggingfaceDataReference'
rows: '#/components/schemas/RowsDataReference'
uri: '#/components/schemas/URIDataSource'
huggingface: '#/components/schemas/HuggingfaceDataSource'
rows: '#/components/schemas/RowsDataSource'
Dataset:
type: object
properties:
@ -5001,12 +5001,12 @@ components:
schema:
type: string
enum:
- jsonl_messages
- messages
title: Schema
description: >-
Schema of the dataset. Each type has a different column format.
data_reference:
$ref: '#/components/schemas/DataReference'
data_source:
$ref: '#/components/schemas/DataSource'
metadata:
type: object
additionalProperties:
@ -5024,10 +5024,10 @@ components:
- provider_id
- type
- schema
- data_reference
- data_source
- metadata
title: Dataset
HuggingfaceDataReference:
HuggingfaceDataSource:
type: object
properties:
type:
@ -5051,8 +5051,8 @@ components:
- type
- dataset_path
- params
title: HuggingfaceDataReference
RowsDataReference:
title: HuggingfaceDataSource
RowsDataSource:
type: object
properties:
type:
@ -5075,8 +5075,8 @@ components:
required:
- type
- rows
title: RowsDataReference
URIDataReference:
title: RowsDataSource
URIDataSource:
type: object
properties:
type:
@ -5089,7 +5089,7 @@ components:
required:
- type
- uri
title: URIDataReference
title: URIDataSource
Model:
type: object
properties:
@ -6472,12 +6472,12 @@ components:
schema:
type: string
enum:
- jsonl_messages
- messages
description: >-
The schema format of the dataset. One of - jsonl_messages: The dataset
is a JSONL file with messages in column format
data_reference:
$ref: '#/components/schemas/DataReference'
The schema format of the dataset. One of - messages: The dataset contains
a messages column with list of messages for post-training.
data_source:
$ref: '#/components/schemas/DataSource'
description: >-
The data reference of the dataset. Examples: - { "type": "uri", "uri":
"https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl"
@ -6504,7 +6504,7 @@ components:
additionalProperties: false
required:
- schema
- data_reference
- data_source
title: RegisterDatasetRequest
RegisterModelRequest:
type: object

View file

@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
class Schema(Enum):
"""
Schema of the dataset. Each type has a different column format.
:cvar jsonl_messages: The dataset is a JSONL file with messages. Examples:
:cvar messages: The dataset contains messages used for post-training. Examples:
{
"messages": [
{"role": "user", "content": "Hello, world!"},
@ -25,7 +25,7 @@ class Schema(Enum):
}
"""
jsonl_messages = "jsonl_messages"
messages = "messages"
# TODO: add more schemas here
@ -36,36 +36,36 @@ class DatasetType(Enum):
@json_schema_type
class URIDataReference(BaseModel):
class URIDataSource(BaseModel):
type: Literal["uri"] = "uri"
uri: str
@json_schema_type
class HuggingfaceDataReference(BaseModel):
class HuggingfaceDataSource(BaseModel):
type: Literal["huggingface"] = "huggingface"
dataset_path: str
params: Dict[str, Any]
@json_schema_type
class RowsDataReference(BaseModel):
class RowsDataSource(BaseModel):
type: Literal["rows"] = "rows"
rows: List[Dict[str, Any]]
DataReference = register_schema(
DataSource = register_schema(
Annotated[
Union[URIDataReference, HuggingfaceDataReference, RowsDataReference],
Union[URIDataSource, HuggingfaceDataSource, RowsDataSource],
Field(discriminator="type"),
],
name="DataReference",
name="DataSource",
)
class CommonDatasetFields(BaseModel):
schema: Schema
data_reference: DataReference
data_source: DataSource
metadata: Dict[str, Any] = Field(
default_factory=dict,
description="Any additional metadata for this dataset",
@ -100,16 +100,16 @@ class Datasets(Protocol):
async def register_dataset(
self,
schema: Schema,
data_reference: DataReference,
data_source: DataSource,
metadata: Optional[Dict[str, Any]] = None,
dataset_id: Optional[str] = None,
) -> Dataset:
"""
Register a new dataset through a file or
Register a new dataset.
:param schema: The schema format of the dataset. One of
- jsonl_messages: The dataset is a JSONL file with messages in column format
:param data_reference: The data reference of the dataset. Examples:
- messages: The dataset contains a messages column with list of messages for post-training.
:param data_source: The data source of the dataset. Examples:
- {
"type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl"