forked from phoenix-oss/llama-stack-mirror
result_data in evaluation response
This commit is contained in:
parent
08c0c5505e
commit
a92756a4b7
3 changed files with 43 additions and 32 deletions
33
docs/_static/llama-stack-spec.html
vendored
33
docs/_static/llama-stack-spec.html
vendored
|
@ -8653,7 +8653,7 @@
|
|||
"EvaluationResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"generations": {
|
||||
"result_data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
|
@ -8680,20 +8680,39 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"description": "The generations in rows for the evaluation."
|
||||
"description": "The result data containing generations and grades in each row."
|
||||
},
|
||||
"scores": {
|
||||
"metrics": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
|
||||
"description": "Map of metric name to aggregated value."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"generations",
|
||||
"scores"
|
||||
"result_data",
|
||||
"metrics"
|
||||
],
|
||||
"title": "EvaluationResponse",
|
||||
"description": "A response to an inline evaluation."
|
||||
|
|
21
docs/_static/llama-stack-spec.yaml
vendored
21
docs/_static/llama-stack-spec.yaml
vendored
|
@ -6018,7 +6018,7 @@ components:
|
|||
EvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
generations:
|
||||
result_data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
|
@ -6031,17 +6031,22 @@ components:
|
|||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The generations in rows for the evaluation.
|
||||
scores:
|
||||
The result data containing generations and grades in each row.
|
||||
metrics:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: >-
|
||||
The scores for the evaluation. Map of grader id to ScoringResult.
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Map of metric name to aggregated value.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- generations
|
||||
- scores
|
||||
- result_data
|
||||
- metrics
|
||||
title: EvaluationResponse
|
||||
description: A response to an inline evaluation.
|
||||
HealthInfo:
|
||||
|
|
|
@ -76,30 +76,17 @@ class EvaluationJob(CommonJobFields):
|
|||
candidate: EvaluationCandidate
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ScoringResult(BaseModel):
|
||||
"""
|
||||
A scoring result for a single row.
|
||||
|
||||
:param scores: The scoring result for each row. Each row is a map of grader column name to value.
|
||||
:param metrics: Map of metric name to aggregated value.
|
||||
"""
|
||||
|
||||
scores: List[Dict[str, Any]]
|
||||
metrics: Dict[str, Any]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EvaluationResponse(BaseModel):
|
||||
"""
|
||||
A response to an inline evaluation.
|
||||
|
||||
:param generations: The generations in rows for the evaluation.
|
||||
:param scores: The scores for the evaluation. Map of grader id to ScoringResult.
|
||||
:param result_data: The result data containing generations and grades in each row.
|
||||
:param metrics: Map of metric name to aggregated value.
|
||||
"""
|
||||
|
||||
generations: List[Dict[str, Any]]
|
||||
scores: Dict[str, ScoringResult]
|
||||
result_data: List[Dict[str, Any]]
|
||||
metrics: Dict[str, Any]
|
||||
|
||||
|
||||
class Evaluation(Protocol):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue