mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 19:12:09 +00:00
Merge branch 'pr1573' into api_2
This commit is contained in:
commit
af4216f34f
3 changed files with 58 additions and 58 deletions
42
docs/_static/llama-stack-spec.html
vendored
42
docs/_static/llama-stack-spec.html
vendored
|
@ -2518,7 +2518,7 @@
|
|||
"tags": [
|
||||
"Datasets"
|
||||
],
|
||||
"description": "Register a new dataset through a file or",
|
||||
"description": "Register a new dataset.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -7144,24 +7144,24 @@
|
|||
],
|
||||
"title": "Benchmark"
|
||||
},
|
||||
"DataReference": {
|
||||
"DataSource": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/URIDataReference"
|
||||
"$ref": "#/components/schemas/URIDataSource"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/HuggingfaceDataReference"
|
||||
"$ref": "#/components/schemas/HuggingfaceDataSource"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/RowsDataReference"
|
||||
"$ref": "#/components/schemas/RowsDataSource"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"uri": "#/components/schemas/URIDataReference",
|
||||
"huggingface": "#/components/schemas/HuggingfaceDataReference",
|
||||
"rows": "#/components/schemas/RowsDataReference"
|
||||
"uri": "#/components/schemas/URIDataSource",
|
||||
"huggingface": "#/components/schemas/HuggingfaceDataSource",
|
||||
"rows": "#/components/schemas/RowsDataSource"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -7185,13 +7185,13 @@
|
|||
"schema": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"jsonl_messages"
|
||||
"messages"
|
||||
],
|
||||
"title": "Schema",
|
||||
"description": "Schema of the dataset. Each type has a different column format."
|
||||
},
|
||||
"data_reference": {
|
||||
"$ref": "#/components/schemas/DataReference"
|
||||
"data_source": {
|
||||
"$ref": "#/components/schemas/DataSource"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -7226,12 +7226,12 @@
|
|||
"provider_id",
|
||||
"type",
|
||||
"schema",
|
||||
"data_reference",
|
||||
"data_source",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Dataset"
|
||||
},
|
||||
"HuggingfaceDataReference": {
|
||||
"HuggingfaceDataSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
|
@ -7274,9 +7274,9 @@
|
|||
"dataset_path",
|
||||
"params"
|
||||
],
|
||||
"title": "HuggingfaceDataReference"
|
||||
"title": "HuggingfaceDataSource"
|
||||
},
|
||||
"RowsDataReference": {
|
||||
"RowsDataSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
|
@ -7318,9 +7318,9 @@
|
|||
"type",
|
||||
"rows"
|
||||
],
|
||||
"title": "RowsDataReference"
|
||||
"title": "RowsDataSource"
|
||||
},
|
||||
"URIDataReference": {
|
||||
"URIDataSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
|
@ -7337,7 +7337,7 @@
|
|||
"type",
|
||||
"uri"
|
||||
],
|
||||
"title": "URIDataReference"
|
||||
"title": "URIDataSource"
|
||||
},
|
||||
"Model": {
|
||||
"type": "object",
|
||||
|
@ -9506,9 +9506,9 @@
|
|||
"schema": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"jsonl_messages"
|
||||
"messages"
|
||||
],
|
||||
"description": "The schema format of the dataset. One of - jsonl_messages: The dataset is a JSONL file with messages in column format"
|
||||
"description": "The schema format of the dataset. One of - messages: The dataset contains a messages column with list of messages for post-training."
|
||||
},
|
||||
"data_reference": {
|
||||
"$ref": "#/components/schemas/DataReference",
|
||||
|
@ -9548,7 +9548,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"schema",
|
||||
"data_reference"
|
||||
"data_source"
|
||||
],
|
||||
"title": "RegisterDatasetRequest"
|
||||
},
|
||||
|
|
48
docs/_static/llama-stack-spec.yaml
vendored
48
docs/_static/llama-stack-spec.yaml
vendored
|
@ -1698,7 +1698,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Datasets
|
||||
description: Register a new dataset through a file or
|
||||
description: Register a new dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -4974,17 +4974,17 @@ components:
|
|||
- scoring_functions
|
||||
- metadata
|
||||
title: Benchmark
|
||||
DataReference:
|
||||
DataSource:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/URIDataReference'
|
||||
- $ref: '#/components/schemas/HuggingfaceDataReference'
|
||||
- $ref: '#/components/schemas/RowsDataReference'
|
||||
- $ref: '#/components/schemas/URIDataSource'
|
||||
- $ref: '#/components/schemas/HuggingfaceDataSource'
|
||||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
uri: '#/components/schemas/URIDataReference'
|
||||
huggingface: '#/components/schemas/HuggingfaceDataReference'
|
||||
rows: '#/components/schemas/RowsDataReference'
|
||||
uri: '#/components/schemas/URIDataSource'
|
||||
huggingface: '#/components/schemas/HuggingfaceDataSource'
|
||||
rows: '#/components/schemas/RowsDataSource'
|
||||
Dataset:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5001,12 +5001,12 @@ components:
|
|||
schema:
|
||||
type: string
|
||||
enum:
|
||||
- jsonl_messages
|
||||
- messages
|
||||
title: Schema
|
||||
description: >-
|
||||
Schema of the dataset. Each type has a different column format.
|
||||
data_reference:
|
||||
$ref: '#/components/schemas/DataReference'
|
||||
data_source:
|
||||
$ref: '#/components/schemas/DataSource'
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -5024,10 +5024,10 @@ components:
|
|||
- provider_id
|
||||
- type
|
||||
- schema
|
||||
- data_reference
|
||||
- data_source
|
||||
- metadata
|
||||
title: Dataset
|
||||
HuggingfaceDataReference:
|
||||
HuggingfaceDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
|
@ -5051,8 +5051,8 @@ components:
|
|||
- type
|
||||
- dataset_path
|
||||
- params
|
||||
title: HuggingfaceDataReference
|
||||
RowsDataReference:
|
||||
title: HuggingfaceDataSource
|
||||
RowsDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
|
@ -5075,8 +5075,8 @@ components:
|
|||
required:
|
||||
- type
|
||||
- rows
|
||||
title: RowsDataReference
|
||||
URIDataReference:
|
||||
title: RowsDataSource
|
||||
URIDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
|
@ -5089,7 +5089,7 @@ components:
|
|||
required:
|
||||
- type
|
||||
- uri
|
||||
title: URIDataReference
|
||||
title: URIDataSource
|
||||
Model:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6472,12 +6472,12 @@ components:
|
|||
schema:
|
||||
type: string
|
||||
enum:
|
||||
- jsonl_messages
|
||||
- messages
|
||||
description: >-
|
||||
The schema format of the dataset. One of - jsonl_messages: The dataset
|
||||
is a JSONL file with messages in column format
|
||||
data_reference:
|
||||
$ref: '#/components/schemas/DataReference'
|
||||
The schema format of the dataset. One of - messages: The dataset contains
|
||||
a messages column with list of messages for post-training.
|
||||
data_source:
|
||||
$ref: '#/components/schemas/DataSource'
|
||||
description: >-
|
||||
The data reference of the dataset. Examples: - { "type": "uri", "uri":
|
||||
"https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": "lsfs://mydata.jsonl"
|
||||
|
@ -6504,7 +6504,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- schema
|
||||
- data_reference
|
||||
- data_source
|
||||
title: RegisterDatasetRequest
|
||||
RegisterModelRequest:
|
||||
type: object
|
||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
|||
class Schema(Enum):
|
||||
"""
|
||||
Schema of the dataset. Each type has a different column format.
|
||||
:cvar jsonl_messages: The dataset is a JSONL file with messages. Examples:
|
||||
:cvar messages: The dataset contains messages used for post-training. Examples:
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world!"},
|
||||
|
@ -25,7 +25,7 @@ class Schema(Enum):
|
|||
}
|
||||
"""
|
||||
|
||||
jsonl_messages = "jsonl_messages"
|
||||
messages = "messages"
|
||||
# TODO: add more schemas here
|
||||
|
||||
|
||||
|
@ -36,36 +36,36 @@ class DatasetType(Enum):
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class URIDataReference(BaseModel):
|
||||
class URIDataSource(BaseModel):
|
||||
type: Literal["uri"] = "uri"
|
||||
uri: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class HuggingfaceDataReference(BaseModel):
|
||||
class HuggingfaceDataSource(BaseModel):
|
||||
type: Literal["huggingface"] = "huggingface"
|
||||
dataset_path: str
|
||||
params: Dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RowsDataReference(BaseModel):
|
||||
class RowsDataSource(BaseModel):
|
||||
type: Literal["rows"] = "rows"
|
||||
rows: List[Dict[str, Any]]
|
||||
|
||||
|
||||
DataReference = register_schema(
|
||||
DataSource = register_schema(
|
||||
Annotated[
|
||||
Union[URIDataReference, HuggingfaceDataReference, RowsDataReference],
|
||||
Union[URIDataSource, HuggingfaceDataSource, RowsDataSource],
|
||||
Field(discriminator="type"),
|
||||
],
|
||||
name="DataReference",
|
||||
name="DataSource",
|
||||
)
|
||||
|
||||
|
||||
class CommonDatasetFields(BaseModel):
|
||||
schema: Schema
|
||||
data_reference: DataReference
|
||||
data_source: DataSource
|
||||
metadata: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Any additional metadata for this dataset",
|
||||
|
@ -100,16 +100,16 @@ class Datasets(Protocol):
|
|||
async def register_dataset(
|
||||
self,
|
||||
schema: Schema,
|
||||
data_reference: DataReference,
|
||||
data_source: DataSource,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
dataset_id: Optional[str] = None,
|
||||
) -> Dataset:
|
||||
"""
|
||||
Register a new dataset through a file or
|
||||
Register a new dataset.
|
||||
|
||||
:param schema: The schema format of the dataset. One of
|
||||
- jsonl_messages: The dataset is a JSONL file with messages in column format
|
||||
:param data_reference: The data reference of the dataset. Examples:
|
||||
- messages: The dataset contains a messages column with list of messages for post-training.
|
||||
:param data_source: The data source of the dataset. Examples:
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "https://mywebsite.com/mydata.jsonl"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue