This commit is contained in:
Xi Yan 2025-03-18 18:12:06 -07:00
parent b98497ee56
commit 238cdc4e69
2 changed files with 221 additions and 207 deletions

View file

@ -2035,49 +2035,6 @@
]
}
},
"/v1/evaluation/grade": {
"post": {
"responses": {
"200": {
"description": "The evaluation job containing grader scores.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluationJob"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Evaluation"
],
"description": "Run an grading job with generated results. Use this when you have generated results from inference in a dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GradeRequest"
}
}
},
"required": true
}
}
},
"/v1/evaluation/grade_sync": {
"post": {
"responses": {
@ -2107,7 +2064,7 @@
"tags": [
"Evaluation"
],
"description": "Run an grading job with generated results inline.",
"description": "Run grading synchronously on generated results, i.e., without scheduling a job. You should use this for quick testing, or when the number of rows is limited. Some implementations may have stricter restrictions on inputs which will be accepted.",
"parameters": [],
"requestBody": {
"content": {
@ -2121,6 +2078,49 @@
}
}
},
"/v1/evaluation/grading": {
"post": {
"responses": {
"200": {
"description": "The evaluation job containing grader scores.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluationJob"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Evaluation"
],
"description": "Schedule a grading job, by grading generated results. The generated results are expected to be in the dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GradingRequest"
}
}
},
"required": true
}
}
},
"/v1/health": {
"get": {
"responses": {
@ -2622,7 +2622,7 @@
"tags": [
"Benchmarks"
],
"description": "Register a new benchmark.",
"description": "Register a new benchmark. A benchmark consists of a dataset id and a list of grader ids.",
"parameters": [],
"requestBody": {
"content": {
@ -3730,7 +3730,7 @@
"tags": [
"Evaluation"
],
"description": "Run an evaluation job.",
"description": "Schedule a full evaluation job, by generating results using candidate and grading them.",
"parameters": [],
"requestBody": {
"content": {
@ -3869,7 +3869,7 @@
"tags": [
"Evaluation"
],
"description": "Run an evaluation job inline.",
"description": "Run an evaluation synchronously, i.e., without scheduling a job\". You should use this for quick testing, or when the number of rows is limited. Some implementations may have stricter restrictions on inputs which will be accepted.",
"parameters": [],
"requestBody": {
"content": {
@ -8615,19 +8615,81 @@
}
}
},
"GradeRequest": {
"GradeSyncRequest": {
"type": "object",
"properties": {
"task": {
"$ref": "#/components/schemas/EvaluationTask",
"description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
}
},
"additionalProperties": false,
"required": [
"task"
],
"title": "GradeRequest"
"title": "GradeSyncRequest"
},
"EvaluationResponse": {
"type": "object",
"properties": {
"generations": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"description": "The generations in rows for the evaluation."
},
"scores": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
},
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
}
},
"additionalProperties": false,
"required": [
"generations",
"scores"
],
"title": "EvaluationResponse",
"description": "A response to an inline evaluation."
},
"GradingRequest": {
"type": "object",
"properties": {
"task": {
"$ref": "#/components/schemas/EvaluationTask",
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
}
},
"additionalProperties": false,
"required": [
"task"
],
"title": "GradingRequest"
},
"EvaluationCandidate": {
"oneOf": [
@ -8701,68 +8763,6 @@
],
"title": "EvaluationJob"
},
"GradeSyncRequest": {
"type": "object",
"properties": {
"task": {
"$ref": "#/components/schemas/EvaluationTask",
"description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
}
},
"additionalProperties": false,
"required": [
"task"
],
"title": "GradeSyncRequest"
},
"EvaluationResponse": {
"type": "object",
"properties": {
"generations": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"description": "The generations in rows for the evaluation."
},
"scores": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
},
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
}
},
"additionalProperties": false,
"required": [
"generations",
"scores"
],
"title": "EvaluationResponse",
"description": "A response to an inline evaluation."
},
"HealthInfo": {
"type": "object",
"properties": {
@ -10737,7 +10737,7 @@
"properties": {
"task": {
"$ref": "#/components/schemas/EvaluationTask",
"description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
},
"candidate": {
"$ref": "#/components/schemas/EvaluationCandidate",
@ -10839,7 +10839,7 @@
"properties": {
"task": {
"$ref": "#/components/schemas/EvaluationTask",
"description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
},
"candidate": {
"$ref": "#/components/schemas/EvaluationCandidate",