update EvaluationTask

This commit is contained in:
Xi Yan 2025-03-18 19:30:01 -07:00
parent f107e3229b
commit d994499f09
3 changed files with 32 additions and 5 deletions

View file

@ -8527,23 +8527,28 @@
"type": "object", "type": "object",
"properties": { "properties": {
"benchmark_id": { "benchmark_id": {
"type": "string" "type": "string",
"description": "The benchmark ID to evaluate."
}, },
"dataset_id": { "dataset_id": {
"type": "string" "type": "string",
"description": "The dataset ID to evaluate."
}, },
"data_source": { "data_source": {
"$ref": "#/components/schemas/DataSource" "$ref": "#/components/schemas/DataSource",
"description": "The data source to evaluate."
}, },
"grader_ids": { "grader_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "The grader IDs to evaluate."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"title": "EvaluationTask" "title": "EvaluationTask",
"description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
}, },
"GradeRequest": { "GradeRequest": {
"type": "object", "type": "object",

View file

@ -5908,16 +5908,26 @@ components:
properties: properties:
benchmark_id: benchmark_id:
type: string type: string
description: The benchmark ID to evaluate.
dataset_id: dataset_id:
type: string type: string
description: The dataset ID to evaluate.
data_source: data_source:
$ref: '#/components/schemas/DataSource' $ref: '#/components/schemas/DataSource'
description: The data source to evaluate.
grader_ids: grader_ids:
type: array type: array
items: items:
type: string type: string
description: The grader IDs to evaluate.
additionalProperties: false additionalProperties: false
title: EvaluationTask title: EvaluationTask
description: >-
A task for evaluation. To specify a task, one of the following must be provided:
- `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id`
and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
- `data_source` and `grader_ids`: Run evaluation task against a data source
(e.g. rows, uri, etc.) and a list of grader_ids
GradeRequest: GradeRequest:
type: object type: object
properties: properties:

View file

@ -49,6 +49,18 @@ EvaluationCandidate = register_schema(
@json_schema_type @json_schema_type
class EvaluationTask(BaseModel): class EvaluationTask(BaseModel):
"""
A task for evaluation. To specify a task, one of the following must be provided:
- `benchmark_id`: Run evaluation task against a benchmark_id
- `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
- `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids
:param benchmark_id: The benchmark ID to evaluate.
:param dataset_id: The dataset ID to evaluate.
:param data_source: The data source to evaluate.
:param grader_ids: The grader IDs to evaluate.
"""
benchmark_id: Optional[str] = None benchmark_id: Optional[str] = None
dataset_id: Optional[str] = None dataset_id: Optional[str] = None
data_source: Optional[DataSource] = None data_source: Optional[DataSource] = None