mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 11:08:20 +00:00
Merge branch 'pr1573' into api_2
This commit is contained in:
commit
0c37951395
4 changed files with 246 additions and 354 deletions
322
docs/_static/llama-stack-spec.html
vendored
322
docs/_static/llama-stack-spec.html
vendored
|
@ -40,75 +40,7 @@
|
|||
}
|
||||
],
|
||||
"paths": {
|
||||
"/v1/datasetio/rows": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PaginatedRowsResult"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"DatasetIO"
|
||||
],
|
||||
"description": "Get a paginated list of rows from a dataset.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_id",
|
||||
"in": "query",
|
||||
"description": "The ID of the dataset to get the rows from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "rows_in_page",
|
||||
"in": "query",
|
||||
"description": "The number of rows to get per page.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "page_token",
|
||||
"in": "query",
|
||||
"description": "The token to get the next page of rows.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "filter_condition",
|
||||
"in": "query",
|
||||
"description": "(Optional) A condition to filter the rows by.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"/v1/datasets/{dataset_id}/rows": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -131,7 +63,16 @@
|
|||
"DatasetIO"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
|
@ -2272,6 +2213,76 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/datasets/{dataset_id}/iterrows": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PaginatedRowsResult"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"DatasetIO"
|
||||
],
|
||||
"description": "Get a paginated list of rows from a dataset.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the dataset to get the rows from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "rows_in_page",
|
||||
"in": "query",
|
||||
"description": "The number of rows to get per page.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "page_token",
|
||||
"in": "query",
|
||||
"description": "The token to get the next page of rows.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "filter_condition",
|
||||
"in": "query",
|
||||
"description": "(Optional) A condition to filter the rows by.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -3861,9 +3872,6 @@
|
|||
"AppendRowsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
@ -3895,7 +3903,6 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"rows"
|
||||
],
|
||||
"title": "AppendRowsRequest"
|
||||
|
@ -6755,9 +6762,6 @@
|
|||
{
|
||||
"$ref": "#/components/schemas/URIDataSource"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/HuggingfaceDataSource"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/RowsDataSource"
|
||||
}
|
||||
|
@ -6766,7 +6770,6 @@
|
|||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"uri": "#/components/schemas/URIDataSource",
|
||||
"huggingface": "#/components/schemas/HuggingfaceDataSource",
|
||||
"rows": "#/components/schemas/RowsDataSource"
|
||||
}
|
||||
}
|
||||
|
@ -6842,65 +6845,6 @@
|
|||
],
|
||||
"title": "Dataset"
|
||||
},
|
||||
"HuggingfaceDataSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "huggingface",
|
||||
"default": "huggingface",
|
||||
"description": "The type of the data source."
|
||||
},
|
||||
"huggingface": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "The path to the dataset in Huggingface. E.g. - \"llamastack/simpleqa\""
|
||||
},
|
||||
"params": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "The parameters for the dataset."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"path",
|
||||
"params"
|
||||
],
|
||||
"description": "The fields for a Huggingface dataset."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"huggingface"
|
||||
],
|
||||
"title": "HuggingfaceDataSource",
|
||||
"description": "A dataset stored in Huggingface."
|
||||
},
|
||||
"RowsDataSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7034,56 +6978,7 @@
|
|||
],
|
||||
"title": "ModelType"
|
||||
},
|
||||
"PaginatedRowsResult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The rows in the current page."
|
||||
},
|
||||
"total_count": {
|
||||
"type": "integer",
|
||||
"description": "The total number of rows in the dataset."
|
||||
},
|
||||
"next_page_token": {
|
||||
"type": "string",
|
||||
"description": "The token to get the next page of rows."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"rows",
|
||||
"total_count"
|
||||
],
|
||||
"title": "PaginatedRowsResult",
|
||||
"description": "A paginated list of rows from a dataset."
|
||||
},
|
||||
"AnswerCorrectnessScoringFn": {
|
||||
"AgentTurnInputType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
|
@ -8537,6 +8432,55 @@
|
|||
],
|
||||
"title": "ToolInvocationResult"
|
||||
},
|
||||
"PaginatedRowsResult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The rows in the current page."
|
||||
},
|
||||
"total_count": {
|
||||
"type": "integer",
|
||||
"description": "The total number of rows in the dataset."
|
||||
},
|
||||
"next_page_token": {
|
||||
"type": "string",
|
||||
"description": "The token to get the next page of rows."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"rows",
|
||||
"total_count"
|
||||
],
|
||||
"title": "PaginatedRowsResult",
|
||||
"description": "A paginated list of rows from a dataset."
|
||||
},
|
||||
"ListAgentSessionsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -9884,7 +9828,7 @@
|
|||
},
|
||||
"source": {
|
||||
"$ref": "#/components/schemas/DataSource",
|
||||
"description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"huggingface\": { \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
|
||||
"description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -9914,7 +9858,7 @@
|
|||
},
|
||||
"dataset_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the dataset. If not provided, a random ID will be generated."
|
||||
"description": "The ID of the dataset. If not provided, an ID will be generated."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
220
docs/_static/llama-stack-spec.yaml
vendored
220
docs/_static/llama-stack-spec.yaml
vendored
|
@ -10,56 +10,7 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/datasetio/rows:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PaginatedRowsResult'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- DatasetIO
|
||||
description: >-
|
||||
Get a paginated list of rows from a dataset.
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: query
|
||||
description: >-
|
||||
The ID of the dataset to get the rows from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: rows_in_page
|
||||
in: query
|
||||
description: The number of rows to get per page.
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
- name: page_token
|
||||
in: query
|
||||
description: The token to get the next page of rows.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: filter_condition
|
||||
in: query
|
||||
description: >-
|
||||
(Optional) A condition to filter the rows by.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
/v1/datasets/{dataset_id}/rows:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -77,7 +28,12 @@ paths:
|
|||
tags:
|
||||
- DatasetIO
|
||||
description: ''
|
||||
parameters: []
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
|
@ -1529,6 +1485,56 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
/v1/datasets/{dataset_id}/iterrows:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PaginatedRowsResult'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- DatasetIO
|
||||
description: >-
|
||||
Get a paginated list of rows from a dataset.
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the dataset to get the rows from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: rows_in_page
|
||||
in: query
|
||||
description: The number of rows to get per page.
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
- name: page_token
|
||||
in: query
|
||||
description: The token to get the next page of rows.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: filter_condition
|
||||
in: query
|
||||
description: >-
|
||||
(Optional) A condition to filter the rows by.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -2636,8 +2642,6 @@ components:
|
|||
AppendRowsRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
|
@ -2652,7 +2656,6 @@ components:
|
|||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- rows
|
||||
title: AppendRowsRequest
|
||||
CompletionMessage:
|
||||
|
@ -4679,13 +4682,11 @@ components:
|
|||
DataSource:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/URIDataSource'
|
||||
- $ref: '#/components/schemas/HuggingfaceDataSource'
|
||||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
uri: '#/components/schemas/URIDataSource'
|
||||
huggingface: '#/components/schemas/HuggingfaceDataSource'
|
||||
rows: '#/components/schemas/RowsDataSource'
|
||||
Dataset:
|
||||
type: object
|
||||
|
@ -4734,43 +4735,6 @@ components:
|
|||
- source
|
||||
- metadata
|
||||
title: Dataset
|
||||
HuggingfaceDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: huggingface
|
||||
default: huggingface
|
||||
description: The type of the data source.
|
||||
huggingface:
|
||||
type: object
|
||||
properties:
|
||||
path:
|
||||
type: string
|
||||
description: >-
|
||||
The path to the dataset in Huggingface. E.g. - "llamastack/simpleqa"
|
||||
params:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The parameters for the dataset.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- path
|
||||
- params
|
||||
description: The fields for a Huggingface dataset.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- huggingface
|
||||
title: HuggingfaceDataSource
|
||||
description: A dataset stored in Huggingface.
|
||||
RowsDataSource:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4860,35 +4824,7 @@ components:
|
|||
- llm
|
||||
- embedding
|
||||
title: ModelType
|
||||
PaginatedRowsResult:
|
||||
type: object
|
||||
properties:
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows in the current page.
|
||||
total_count:
|
||||
type: integer
|
||||
description: The total number of rows in the dataset.
|
||||
next_page_token:
|
||||
type: string
|
||||
description: The token to get the next page of rows.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- rows
|
||||
- total_count
|
||||
title: PaginatedRowsResult
|
||||
description: A paginated list of rows from a dataset.
|
||||
AnswerCorrectnessScoringFn:
|
||||
AgentTurnInputType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
|
@ -5883,6 +5819,34 @@ components:
|
|||
required:
|
||||
- content
|
||||
title: ToolInvocationResult
|
||||
PaginatedRowsResult:
|
||||
type: object
|
||||
properties:
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows in the current page.
|
||||
total_count:
|
||||
type: integer
|
||||
description: The total number of rows in the dataset.
|
||||
next_page_token:
|
||||
type: string
|
||||
description: The token to get the next page of rows.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- rows
|
||||
- total_count
|
||||
title: PaginatedRowsResult
|
||||
description: A paginated list of rows from a dataset.
|
||||
ListAgentSessionsResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6804,11 +6768,11 @@ components:
|
|||
$ref: '#/components/schemas/DataSource'
|
||||
description: >-
|
||||
The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
|
||||
} - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface",
|
||||
"huggingface": { "dataset_path": "tatsu-lab/alpaca", "params": { "split":
|
||||
"train" } } } - { "type": "rows", "rows": [ { "messages": [ {"role": "user",
|
||||
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
|
||||
world!"}, ] } ] }
|
||||
} - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri",
|
||||
"uri": "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
|
||||
"huggingface://llamastack/simpleqa?split=train" } - { "type": "rows",
|
||||
"rows": [ { "messages": [ {"role": "user", "content": "Hello, world!"},
|
||||
{"role": "assistant", "content": "Hello, world!"}, ] } ] }
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -6824,7 +6788,7 @@ components:
|
|||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset. If not provided, a random ID will be generated.
|
||||
The ID of the dataset. If not provided, an ID will be generated.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- purpose
|
||||
|
|
|
@ -37,8 +37,8 @@ class DatasetIO(Protocol):
|
|||
# keeping for aligning with inference/safety, but this is not used
|
||||
dataset_store: DatasetStore
|
||||
|
||||
@webmethod(route="/datasetio/rows", method="GET")
|
||||
async def get_rows_paginated(
|
||||
@webmethod(route="/datasets/{dataset_id}/iterrows", method="GET")
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
rows_in_page: int,
|
||||
|
@ -54,5 +54,7 @@ class DatasetIO(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasetio/rows", method="POST")
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
|
||||
@webmethod(route="/datasets/{dataset_id}/rows", method="POST")
|
||||
async def append_rows(
|
||||
self, dataset_id: str, rows: List[Dict[str, Any]]
|
||||
) -> None: ...
|
||||
|
|
|
@ -60,6 +60,7 @@ class DatasetPurpose(Enum):
|
|||
"answer": "Paris"
|
||||
}
|
||||
"""
|
||||
|
||||
post_training_messages = "post-training/messages"
|
||||
|
||||
eval_question_answer = "eval/question-answer"
|
||||
|
@ -75,11 +76,10 @@ class DatasetPurpose(Enum):
|
|||
class DatasetType(Enum):
|
||||
"""
|
||||
Type of the dataset source.
|
||||
:cvar huggingface: The dataset is stored in Huggingface.
|
||||
:cvar uri: The dataset can be obtained from a URI.
|
||||
:cvar rows: The dataset is stored in rows.
|
||||
:cvar uri: The dataset can be obtained from a URI.
|
||||
:cvar rows: The dataset is stored in rows.
|
||||
"""
|
||||
huggingface = "huggingface"
|
||||
|
||||
uri = "uri"
|
||||
rows = "rows"
|
||||
|
||||
|
@ -92,30 +92,11 @@ class URIDataSource(BaseModel):
|
|||
- "lsfs://mydata.jsonl"
|
||||
- "data:csv;base64,{base64_content}"
|
||||
"""
|
||||
|
||||
type: Literal["uri"] = "uri"
|
||||
uri: str
|
||||
|
||||
|
||||
class HuggingfaceDataSourceFields(BaseModel):
|
||||
"""The fields for a Huggingface dataset.
|
||||
:param path: The path to the dataset in Huggingface. E.g.
|
||||
- "llamastack/simpleqa"
|
||||
:param params: The parameters for the dataset.
|
||||
"""
|
||||
path: str
|
||||
params: Dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class HuggingfaceDataSource(BaseModel):
|
||||
"""A dataset stored in Huggingface.
|
||||
:param type: The type of the data source.
|
||||
:param huggingface: The fields for a Huggingface dataset.
|
||||
"""
|
||||
type: Literal["huggingface"] = "huggingface"
|
||||
huggingface: HuggingfaceDataSourceFields
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RowsDataSource(BaseModel):
|
||||
"""A dataset stored in rows.
|
||||
|
@ -124,13 +105,14 @@ class RowsDataSource(BaseModel):
|
|||
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
|
||||
]
|
||||
"""
|
||||
|
||||
type: Literal["rows"] = "rows"
|
||||
rows: List[Dict[str, Any]]
|
||||
|
||||
|
||||
DataSource = register_schema(
|
||||
Annotated[
|
||||
Union[URIDataSource, HuggingfaceDataSource, RowsDataSource],
|
||||
Union[URIDataSource, RowsDataSource],
|
||||
Field(discriminator="type"),
|
||||
],
|
||||
name="DataSource",
|
||||
|
@ -141,6 +123,7 @@ class CommonDatasetFields(BaseModel):
|
|||
"""
|
||||
Common fields for a dataset.
|
||||
"""
|
||||
|
||||
purpose: DatasetPurpose
|
||||
source: DataSource
|
||||
metadata: Dict[str, Any] = Field(
|
||||
|
@ -237,13 +220,12 @@ class Datasets(Protocol):
|
|||
"uri": "lsfs://mydata.jsonl"
|
||||
}
|
||||
- {
|
||||
"type": "huggingface",
|
||||
"huggingface": {
|
||||
"dataset_path": "tatsu-lab/alpaca",
|
||||
"params": {
|
||||
"split": "train"
|
||||
}
|
||||
}
|
||||
"type": "uri",
|
||||
"uri": "data:csv;base64,{base64_content}"
|
||||
}
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "huggingface://llamastack/simpleqa?split=train"
|
||||
}
|
||||
- {
|
||||
"type": "rows",
|
||||
|
@ -258,7 +240,7 @@ class Datasets(Protocol):
|
|||
}
|
||||
:param metadata: The metadata for the dataset.
|
||||
- E.g. {"description": "My dataset"}
|
||||
:param dataset_id: The ID of the dataset. If not provided, a random ID will be generated.
|
||||
:param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
|
||||
"""
|
||||
...
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue