This commit is contained in:
Xi Yan 2025-03-18 18:16:00 -07:00
parent 398319fe7a
commit 5e817cd56a
3 changed files with 187 additions and 187 deletions

View file

@ -2035,6 +2035,49 @@
]
}
},
"/v1/evaluation/grading": {
"post": {
"responses": {
"200": {
"description": "The evaluation job containing grader scores.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluationJob"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Evaluation"
],
"description": "Schedule a grading job, by grading generated (model or agent) results. The generated results are expected to be in the dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GradeRequest"
}
}
},
"required": true
}
}
},
"/v1/evaluation/grade_sync": {
"post": {
"responses": {
@ -2078,49 +2121,6 @@
}
}
},
"/v1/evaluation/grading": {
"post": {
"responses": {
"200": {
"description": "The evaluation job containing grader scores.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/EvaluationJob"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Evaluation"
],
"description": "Schedule a grading job, by grading generated results. The generated results are expected to be in the dataset.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GradingRequest"
}
}
},
"required": true
}
}
},
"/v1/health": {
"get": {
"responses": {
@ -8615,7 +8615,7 @@
}
}
},
"GradeSyncRequest": {
"GradeRequest": {
"type": "object",
"properties": {
"task": {
@ -8627,69 +8627,7 @@
"required": [
"task"
],
"title": "GradeSyncRequest"
},
"EvaluationResponse": {
"type": "object",
"properties": {
"generations": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"description": "The generations in rows for the evaluation."
},
"scores": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
},
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
}
},
"additionalProperties": false,
"required": [
"generations",
"scores"
],
"title": "EvaluationResponse",
"description": "A response to an inline evaluation."
},
"GradingRequest": {
"type": "object",
"properties": {
"task": {
"$ref": "#/components/schemas/EvaluationTask",
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
}
},
"additionalProperties": false,
"required": [
"task"
],
"title": "GradingRequest"
"title": "GradeRequest"
},
"EvaluationCandidate": {
"oneOf": [
@ -8763,6 +8701,68 @@
],
"title": "EvaluationJob"
},
"GradeSyncRequest": {
"type": "object",
"properties": {
"task": {
"$ref": "#/components/schemas/EvaluationTask",
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
}
},
"additionalProperties": false,
"required": [
"task"
],
"title": "GradeSyncRequest"
},
"EvaluationResponse": {
"type": "object",
"properties": {
"generations": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"description": "The generations in rows for the evaluation."
},
"scores": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
},
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
}
},
"additionalProperties": false,
"required": [
"generations",
"scores"
],
"title": "EvaluationResponse",
"description": "A response to an inline evaluation."
},
"HealthInfo": {
"type": "object",
"properties": {

View file

@ -1385,6 +1385,38 @@ paths:
required: true
schema:
type: string
/v1/evaluation/grading:
post:
responses:
'200':
description: >-
The evaluation job containing grader scores.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluationJob'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Evaluation
description: >-
Schedule a grading job, by grading generated (model or agent) results. The
generated results are expected to be in the dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GradeRequest'
required: true
/v1/evaluation/grade_sync:
post:
responses:
@ -1420,38 +1452,6 @@ paths:
schema:
$ref: '#/components/schemas/GradeSyncRequest'
required: true
/v1/evaluation/grading:
post:
responses:
'200':
description: >-
The evaluation job containing grader scores.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluationJob'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Evaluation
description: >-
Schedule a grading job, by grading generated results. The generated results
are expected to be in the dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GradingRequest'
required: true
/v1/health:
get:
responses:
@ -5966,7 +5966,7 @@ components:
benchmark: '#/components/schemas/BenchmarkEvaluationTask'
dataset: '#/components/schemas/DatasetEvaluationTask'
data: '#/components/schemas/DataEvaluationTask'
GradeSyncRequest:
GradeRequest:
type: object
properties:
task:
@ -5980,51 +5980,7 @@ components:
additionalProperties: false
required:
- task
title: GradeSyncRequest
EvaluationResponse:
type: object
properties:
generations:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The generations in rows for the evaluation.
scores:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: >-
The scores for the evaluation. Map of grader id to ScoringResult.
additionalProperties: false
required:
- generations
- scores
title: EvaluationResponse
description: A response to an inline evaluation.
GradingRequest:
type: object
properties:
task:
$ref: '#/components/schemas/EvaluationTask'
description: >-
The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation
task against a benchmark_id - DatasetEvaluationTask: Run evaluation task
against a dataset_id and a list of grader_ids - DataEvaluationTask: Run
evaluation task against a data source (e.g. rows, uri, etc.) and a list
of grader_ids
additionalProperties: false
required:
- task
title: GradingRequest
title: GradeRequest
EvaluationCandidate:
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
@ -6078,6 +6034,50 @@ components:
- task
- candidate
title: EvaluationJob
GradeSyncRequest:
type: object
properties:
task:
$ref: '#/components/schemas/EvaluationTask'
description: >-
The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation
task against a benchmark_id - DatasetEvaluationTask: Run evaluation task
against a dataset_id and a list of grader_ids - DataEvaluationTask: Run
evaluation task against a data source (e.g. rows, uri, etc.) and a list
of grader_ids
additionalProperties: false
required:
- task
title: GradeSyncRequest
EvaluationResponse:
type: object
properties:
generations:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The generations in rows for the evaluation.
scores:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: >-
The scores for the evaluation. Map of grader id to ScoringResult.
additionalProperties: false
required:
- generations
- scores
title: EvaluationResponse
description: A response to an inline evaluation.
HealthInfo:
type: object
properties:

View file

@ -148,9 +148,9 @@ class Evaluation(Protocol):
...
@webmethod(route="/evaluation/grading", method="POST")
async def grading(self, task: EvaluationTask) -> EvaluationJob:
async def grade(self, task: EvaluationTask) -> EvaluationJob:
"""
Schedule a grading job, by grading generated results. The generated results are expected to be in the dataset.
Schedule a grading job, by grading generated (model or agent) results. The generated results are expected to be in the dataset.
:param task: The task to evaluate. One of:
- BenchmarkEvaluationTask: Run evaluation task against a benchmark_id