result_data in evaluation response

This commit is contained in:
Xi Yan 2025-03-18 22:09:35 -07:00
parent 08c0c5505e
commit a92756a4b7
3 changed files with 43 additions and 32 deletions

View file

@ -8653,7 +8653,7 @@
"EvaluationResponse": {
"type": "object",
"properties": {
"generations": {
"result_data": {
"type": "array",
"items": {
"type": "object",
@ -8680,20 +8680,39 @@
]
}
},
"description": "The generations in rows for the evaluation."
"description": "The result data containing generations and grades in each row."
},
"scores": {
"metrics": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
"oneOf": [
{
"type": "null"
},
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "Map of metric name to aggregated value."
}
},
"additionalProperties": false,
"required": [
"generations",
"scores"
"result_data",
"metrics"
],
"title": "EvaluationResponse",
"description": "A response to an inline evaluation."

View file

@ -6018,7 +6018,7 @@ components:
EvaluationResponse:
type: object
properties:
generations:
result_data:
type: array
items:
type: object
@ -6031,17 +6031,22 @@ components:
- type: array
- type: object
description: >-
The generations in rows for the evaluation.
scores:
The result data containing generations and grades in each row.
metrics:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: >-
The scores for the evaluation. Map of grader id to ScoringResult.
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Map of metric name to aggregated value.
additionalProperties: false
required:
- generations
- scores
- result_data
- metrics
title: EvaluationResponse
description: A response to an inline evaluation.
HealthInfo:

View file

@ -76,30 +76,17 @@ class EvaluationJob(CommonJobFields):
candidate: EvaluationCandidate
@json_schema_type
class ScoringResult(BaseModel):
"""
A scoring result for a single row.
:param scores: The scoring result for each row. Each row is a map of grader column name to value.
:param metrics: Map of metric name to aggregated value.
"""
scores: List[Dict[str, Any]]
metrics: Dict[str, Any]
@json_schema_type
class EvaluationResponse(BaseModel):
"""
A response to an inline evaluation.
:param generations: The generations in rows for the evaluation.
:param scores: The scores for the evaluation. Map of grader id to ScoringResult.
:param result_data: The result data containing generations and grades in each row.
:param metrics: Map of metric name to aggregated value.
"""
generations: List[Dict[str, Any]]
scores: Dict[str, ScoringResult]
result_data: List[Dict[str, Any]]
metrics: Dict[str, Any]
class Evaluation(Protocol):