Merge branch 'pr1573' into api_2

This commit is contained in:
Xi Yan 2025-03-13 14:49:04 -07:00
commit 0c37951395
4 changed files with 246 additions and 354 deletions

View file

@ -40,75 +40,7 @@
} }
], ],
"paths": { "paths": {
"/v1/datasetio/rows": { "/v1/datasets/{dataset_id}/rows": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PaginatedRowsResult"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"DatasetIO"
],
"description": "Get a paginated list of rows from a dataset.",
"parameters": [
{
"name": "dataset_id",
"in": "query",
"description": "The ID of the dataset to get the rows from.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "rows_in_page",
"in": "query",
"description": "The number of rows to get per page.",
"required": true,
"schema": {
"type": "integer"
}
},
{
"name": "page_token",
"in": "query",
"description": "The token to get the next page of rows.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "filter_condition",
"in": "query",
"description": "(Optional) A condition to filter the rows by.",
"required": false,
"schema": {
"type": "string"
}
}
]
},
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -131,7 +63,16 @@
"DatasetIO" "DatasetIO"
], ],
"description": "", "description": "",
"parameters": [], "parameters": [
{
"name": "dataset_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": { "requestBody": {
"content": { "content": {
"application/json": { "application/json": {
@ -2272,6 +2213,76 @@
} }
} }
}, },
"/v1/datasets/{dataset_id}/iterrows": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/PaginatedRowsResult"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"DatasetIO"
],
"description": "Get a paginated list of rows from a dataset.",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"description": "The ID of the dataset to get the rows from.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "rows_in_page",
"in": "query",
"description": "The number of rows to get per page.",
"required": true,
"schema": {
"type": "integer"
}
},
{
"name": "page_token",
"in": "query",
"description": "The token to get the next page of rows.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "filter_condition",
"in": "query",
"description": "(Optional) A condition to filter the rows by.",
"required": false,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
"get": { "get": {
"responses": { "responses": {
@ -3861,9 +3872,6 @@
"AppendRowsRequest": { "AppendRowsRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"dataset_id": {
"type": "string"
},
"rows": { "rows": {
"type": "array", "type": "array",
"items": { "items": {
@ -3895,7 +3903,6 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"dataset_id",
"rows" "rows"
], ],
"title": "AppendRowsRequest" "title": "AppendRowsRequest"
@ -6755,9 +6762,6 @@
{ {
"$ref": "#/components/schemas/URIDataSource" "$ref": "#/components/schemas/URIDataSource"
}, },
{
"$ref": "#/components/schemas/HuggingfaceDataSource"
},
{ {
"$ref": "#/components/schemas/RowsDataSource" "$ref": "#/components/schemas/RowsDataSource"
} }
@ -6766,7 +6770,6 @@
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"uri": "#/components/schemas/URIDataSource", "uri": "#/components/schemas/URIDataSource",
"huggingface": "#/components/schemas/HuggingfaceDataSource",
"rows": "#/components/schemas/RowsDataSource" "rows": "#/components/schemas/RowsDataSource"
} }
} }
@ -6842,65 +6845,6 @@
], ],
"title": "Dataset" "title": "Dataset"
}, },
"HuggingfaceDataSource": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "huggingface",
"default": "huggingface",
"description": "The type of the data source."
},
"huggingface": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "The path to the dataset in Huggingface. E.g. - \"llamastack/simpleqa\""
},
"params": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "The parameters for the dataset."
}
},
"additionalProperties": false,
"required": [
"path",
"params"
],
"description": "The fields for a Huggingface dataset."
}
},
"additionalProperties": false,
"required": [
"type",
"huggingface"
],
"title": "HuggingfaceDataSource",
"description": "A dataset stored in Huggingface."
},
"RowsDataSource": { "RowsDataSource": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -7034,56 +6978,7 @@
], ],
"title": "ModelType" "title": "ModelType"
}, },
"PaginatedRowsResult": { "AgentTurnInputType": {
"type": "object",
"properties": {
"rows": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"description": "The rows in the current page."
},
"total_count": {
"type": "integer",
"description": "The total number of rows in the dataset."
},
"next_page_token": {
"type": "string",
"description": "The token to get the next page of rows."
}
},
"additionalProperties": false,
"required": [
"rows",
"total_count"
],
"title": "PaginatedRowsResult",
"description": "A paginated list of rows from a dataset."
},
"AnswerCorrectnessScoringFn": {
"type": "object", "type": "object",
"properties": { "properties": {
"type": { "type": {
@ -8537,6 +8432,55 @@
], ],
"title": "ToolInvocationResult" "title": "ToolInvocationResult"
}, },
"PaginatedRowsResult": {
"type": "object",
"properties": {
"rows": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"description": "The rows in the current page."
},
"total_count": {
"type": "integer",
"description": "The total number of rows in the dataset."
},
"next_page_token": {
"type": "string",
"description": "The token to get the next page of rows."
}
},
"additionalProperties": false,
"required": [
"rows",
"total_count"
],
"title": "PaginatedRowsResult",
"description": "A paginated list of rows from a dataset."
},
"ListAgentSessionsResponse": { "ListAgentSessionsResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -9884,7 +9828,7 @@
}, },
"source": { "source": {
"$ref": "#/components/schemas/DataSource", "$ref": "#/components/schemas/DataSource",
"description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"huggingface\", \"huggingface\": { \"dataset_path\": \"tatsu-lab/alpaca\", \"params\": { \"split\": \"train\" } } } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -9914,7 +9858,7 @@
}, },
"dataset_id": { "dataset_id": {
"type": "string", "type": "string",
"description": "The ID of the dataset. If not provided, a random ID will be generated." "description": "The ID of the dataset. If not provided, an ID will be generated."
} }
}, },
"additionalProperties": false, "additionalProperties": false,

View file

@ -10,56 +10,7 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/datasetio/rows: /v1/datasets/{dataset_id}/rows:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/PaginatedRowsResult'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
description: >-
Get a paginated list of rows from a dataset.
parameters:
- name: dataset_id
in: query
description: >-
The ID of the dataset to get the rows from.
required: true
schema:
type: string
- name: rows_in_page
in: query
description: The number of rows to get per page.
required: true
schema:
type: integer
- name: page_token
in: query
description: The token to get the next page of rows.
required: false
schema:
type: string
- name: filter_condition
in: query
description: >-
(Optional) A condition to filter the rows by.
required: false
schema:
type: string
post: post:
responses: responses:
'200': '200':
@ -77,7 +28,12 @@ paths:
tags: tags:
- DatasetIO - DatasetIO
description: '' description: ''
parameters: [] parameters:
- name: dataset_id
in: path
required: true
schema:
type: string
requestBody: requestBody:
content: content:
application/json: application/json:
@ -1529,6 +1485,56 @@ paths:
schema: schema:
$ref: '#/components/schemas/InvokeToolRequest' $ref: '#/components/schemas/InvokeToolRequest'
required: true required: true
/v1/datasets/{dataset_id}/iterrows:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/PaginatedRowsResult'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
description: >-
Get a paginated list of rows from a dataset.
parameters:
- name: dataset_id
in: path
description: >-
The ID of the dataset to get the rows from.
required: true
schema:
type: string
- name: rows_in_page
in: query
description: The number of rows to get per page.
required: true
schema:
type: integer
- name: page_token
in: query
description: The token to get the next page of rows.
required: false
schema:
type: string
- name: filter_condition
in: query
description: >-
(Optional) A condition to filter the rows by.
required: false
schema:
type: string
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get: get:
responses: responses:
@ -2636,8 +2642,6 @@ components:
AppendRowsRequest: AppendRowsRequest:
type: object type: object
properties: properties:
dataset_id:
type: string
rows: rows:
type: array type: array
items: items:
@ -2652,7 +2656,6 @@ components:
- type: object - type: object
additionalProperties: false additionalProperties: false
required: required:
- dataset_id
- rows - rows
title: AppendRowsRequest title: AppendRowsRequest
CompletionMessage: CompletionMessage:
@ -4679,13 +4682,11 @@ components:
DataSource: DataSource:
oneOf: oneOf:
- $ref: '#/components/schemas/URIDataSource' - $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/HuggingfaceDataSource'
- $ref: '#/components/schemas/RowsDataSource' - $ref: '#/components/schemas/RowsDataSource'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
uri: '#/components/schemas/URIDataSource' uri: '#/components/schemas/URIDataSource'
huggingface: '#/components/schemas/HuggingfaceDataSource'
rows: '#/components/schemas/RowsDataSource' rows: '#/components/schemas/RowsDataSource'
Dataset: Dataset:
type: object type: object
@ -4734,43 +4735,6 @@ components:
- source - source
- metadata - metadata
title: Dataset title: Dataset
HuggingfaceDataSource:
type: object
properties:
type:
type: string
const: huggingface
default: huggingface
description: The type of the data source.
huggingface:
type: object
properties:
path:
type: string
description: >-
The path to the dataset in Huggingface. E.g. - "llamastack/simpleqa"
params:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The parameters for the dataset.
additionalProperties: false
required:
- path
- params
description: The fields for a Huggingface dataset.
additionalProperties: false
required:
- type
- huggingface
title: HuggingfaceDataSource
description: A dataset stored in Huggingface.
RowsDataSource: RowsDataSource:
type: object type: object
properties: properties:
@ -4860,35 +4824,7 @@ components:
- llm - llm
- embedding - embedding
title: ModelType title: ModelType
PaginatedRowsResult: AgentTurnInputType:
type: object
properties:
rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows in the current page.
total_count:
type: integer
description: The total number of rows in the dataset.
next_page_token:
type: string
description: The token to get the next page of rows.
additionalProperties: false
required:
- rows
- total_count
title: PaginatedRowsResult
description: A paginated list of rows from a dataset.
AnswerCorrectnessScoringFn:
type: object type: object
properties: properties:
type: type:
@ -5883,6 +5819,34 @@ components:
required: required:
- content - content
title: ToolInvocationResult title: ToolInvocationResult
PaginatedRowsResult:
type: object
properties:
rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows in the current page.
total_count:
type: integer
description: The total number of rows in the dataset.
next_page_token:
type: string
description: The token to get the next page of rows.
additionalProperties: false
required:
- rows
- total_count
title: PaginatedRowsResult
description: A paginated list of rows from a dataset.
ListAgentSessionsResponse: ListAgentSessionsResponse:
type: object type: object
properties: properties:
@ -6804,11 +6768,11 @@ components:
$ref: '#/components/schemas/DataSource' $ref: '#/components/schemas/DataSource'
description: >- description: >-
The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl" The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
} - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "huggingface", } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri",
"huggingface": { "dataset_path": "tatsu-lab/alpaca", "params": { "split": "uri": "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
"train" } } } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows",
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello, "rows": [ { "messages": [ {"role": "user", "content": "Hello, world!"},
world!"}, ] } ] } {"role": "assistant", "content": "Hello, world!"}, ] } ] }
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -6824,7 +6788,7 @@ components:
dataset_id: dataset_id:
type: string type: string
description: >- description: >-
The ID of the dataset. If not provided, a random ID will be generated. The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false additionalProperties: false
required: required:
- purpose - purpose

View file

@ -37,8 +37,8 @@ class DatasetIO(Protocol):
# keeping for aligning with inference/safety, but this is not used # keeping for aligning with inference/safety, but this is not used
dataset_store: DatasetStore dataset_store: DatasetStore
@webmethod(route="/datasetio/rows", method="GET") @webmethod(route="/datasets/{dataset_id}/iterrows", method="GET")
async def get_rows_paginated( async def iterrows(
self, self,
dataset_id: str, dataset_id: str,
rows_in_page: int, rows_in_page: int,
@ -54,5 +54,7 @@ class DatasetIO(Protocol):
""" """
... ...
@webmethod(route="/datasetio/rows", method="POST") @webmethod(route="/datasets/{dataset_id}/rows", method="POST")
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ... async def append_rows(
self, dataset_id: str, rows: List[Dict[str, Any]]
) -> None: ...

View file

@ -60,6 +60,7 @@ class DatasetPurpose(Enum):
"answer": "Paris" "answer": "Paris"
} }
""" """
post_training_messages = "post-training/messages" post_training_messages = "post-training/messages"
eval_question_answer = "eval/question-answer" eval_question_answer = "eval/question-answer"
@ -75,11 +76,10 @@ class DatasetPurpose(Enum):
class DatasetType(Enum): class DatasetType(Enum):
""" """
Type of the dataset source. Type of the dataset source.
:cvar huggingface: The dataset is stored in Huggingface. :cvar uri: The dataset can be obtained from a URI.
:cvar uri: The dataset can be obtained from a URI. :cvar rows: The dataset is stored in rows.
:cvar rows: The dataset is stored in rows.
""" """
huggingface = "huggingface"
uri = "uri" uri = "uri"
rows = "rows" rows = "rows"
@ -92,30 +92,11 @@ class URIDataSource(BaseModel):
- "lsfs://mydata.jsonl" - "lsfs://mydata.jsonl"
- "data:csv;base64,{base64_content}" - "data:csv;base64,{base64_content}"
""" """
type: Literal["uri"] = "uri" type: Literal["uri"] = "uri"
uri: str uri: str
class HuggingfaceDataSourceFields(BaseModel):
"""The fields for a Huggingface dataset.
:param path: The path to the dataset in Huggingface. E.g.
- "llamastack/simpleqa"
:param params: The parameters for the dataset.
"""
path: str
params: Dict[str, Any]
@json_schema_type
class HuggingfaceDataSource(BaseModel):
"""A dataset stored in Huggingface.
:param type: The type of the data source.
:param huggingface: The fields for a Huggingface dataset.
"""
type: Literal["huggingface"] = "huggingface"
huggingface: HuggingfaceDataSourceFields
@json_schema_type @json_schema_type
class RowsDataSource(BaseModel): class RowsDataSource(BaseModel):
"""A dataset stored in rows. """A dataset stored in rows.
@ -124,13 +105,14 @@ class RowsDataSource(BaseModel):
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]} {"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
] ]
""" """
type: Literal["rows"] = "rows" type: Literal["rows"] = "rows"
rows: List[Dict[str, Any]] rows: List[Dict[str, Any]]
DataSource = register_schema( DataSource = register_schema(
Annotated[ Annotated[
Union[URIDataSource, HuggingfaceDataSource, RowsDataSource], Union[URIDataSource, RowsDataSource],
Field(discriminator="type"), Field(discriminator="type"),
], ],
name="DataSource", name="DataSource",
@ -141,6 +123,7 @@ class CommonDatasetFields(BaseModel):
""" """
Common fields for a dataset. Common fields for a dataset.
""" """
purpose: DatasetPurpose purpose: DatasetPurpose
source: DataSource source: DataSource
metadata: Dict[str, Any] = Field( metadata: Dict[str, Any] = Field(
@ -237,13 +220,12 @@ class Datasets(Protocol):
"uri": "lsfs://mydata.jsonl" "uri": "lsfs://mydata.jsonl"
} }
- { - {
"type": "huggingface", "type": "uri",
"huggingface": { "uri": "data:csv;base64,{base64_content}"
"dataset_path": "tatsu-lab/alpaca", }
"params": { - {
"split": "train" "type": "uri",
} "uri": "huggingface://llamastack/simpleqa?split=train"
}
} }
- { - {
"type": "rows", "type": "rows",
@ -258,7 +240,7 @@ class Datasets(Protocol):
} }
:param metadata: The metadata for the dataset. :param metadata: The metadata for the dataset.
- E.g. {"description": "My dataset"} - E.g. {"description": "My dataset"}
:param dataset_id: The ID of the dataset. If not provided, a random ID will be generated. :param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
""" """
... ...