diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 2a294ea11..15b06257f 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -8653,7 +8653,7 @@
"EvaluationResponse": {
"type": "object",
"properties": {
- "result_data": {
+ "result_rows": {
"type": "array",
"items": {
"type": "object",
@@ -8680,9 +8680,9 @@
]
}
},
- "description": "The result data containing generations and grades in each row."
+ "description": "The result data containing inputs, generations and grades in each row."
},
- "metrics": {
+ "grades": {
"type": "object",
"additionalProperties": {
"oneOf": [
@@ -8706,13 +8706,13 @@
}
]
},
- "description": "Map of metric name to aggregated value."
+ "description": "Map of grader id to aggregated value."
}
},
"additionalProperties": false,
"required": [
- "result_data",
- "metrics"
+ "result_rows",
+ "grades"
],
"title": "EvaluationResponse",
"description": "A response to an inline evaluation."
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 7508acd66..0f83dd3d7 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -6018,7 +6018,7 @@ components:
EvaluationResponse:
type: object
properties:
- result_data:
+ result_rows:
type: array
items:
type: object
@@ -6031,8 +6031,8 @@ components:
- type: array
- type: object
description: >-
- The result data containing generations and grades in each row.
- metrics:
+ The result data containing inputs, generations and grades in each row.
+ grades:
type: object
additionalProperties:
oneOf:
@@ -6042,11 +6042,11 @@ components:
- type: string
- type: array
- type: object
- description: Map of metric name to aggregated value.
+ description: Map of grader id to aggregated value.
additionalProperties: false
required:
- - result_data
- - metrics
+ - result_rows
+ - grades
title: EvaluationResponse
description: A response to an inline evaluation.
HealthInfo:
diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py
index 8d6fdd201..bde27e0be 100644
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@@ -81,12 +81,12 @@ class EvaluationResponse(BaseModel):
"""
A response to an inline evaluation.
- :param result_data: The result data containing generations and grades in each row.
- :param metrics: Map of metric name to aggregated value.
+ :param result_rows: The result data containing inputs, generations and grades in each row.
+ :param grades: Map of grader id to aggregated value.
"""
- result_data: List[Dict[str, Any]]
- metrics: Dict[str, Any]
+ result_rows: List[Dict[str, Any]]
+ grades: Dict[str, Any]
class Evaluation(Protocol):