diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2a294ea11..15b06257f 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -8653,7 +8653,7 @@ "EvaluationResponse": { "type": "object", "properties": { - "result_data": { + "result_rows": { "type": "array", "items": { "type": "object", @@ -8680,9 +8680,9 @@ ] } }, - "description": "The result data containing generations and grades in each row." + "description": "The result data containing inputs, generations and grades in each row." }, - "metrics": { + "grades": { "type": "object", "additionalProperties": { "oneOf": [ @@ -8706,13 +8706,13 @@ } ] }, - "description": "Map of metric name to aggregated value." + "description": "Map of grader id to aggregated value." } }, "additionalProperties": false, "required": [ - "result_data", - "metrics" + "result_rows", + "grades" ], "title": "EvaluationResponse", "description": "A response to an inline evaluation." diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 7508acd66..0f83dd3d7 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -6018,7 +6018,7 @@ components: EvaluationResponse: type: object properties: - result_data: + result_rows: type: array items: type: object @@ -6031,8 +6031,8 @@ components: - type: array - type: object description: >- - The result data containing generations and grades in each row. - metrics: + The result data containing inputs, generations and grades in each row. + grades: type: object additionalProperties: oneOf: @@ -6042,11 +6042,11 @@ components: - type: string - type: array - type: object - description: Map of metric name to aggregated value. + description: Map of grader id to aggregated value. additionalProperties: false required: - - result_data - - metrics + - result_rows + - grades title: EvaluationResponse description: A response to an inline evaluation. HealthInfo: diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py index 8d6fdd201..bde27e0be 100644 --- a/llama_stack/apis/evaluation/evaluation.py +++ b/llama_stack/apis/evaluation/evaluation.py @@ -81,12 +81,12 @@ class EvaluationResponse(BaseModel): """ A response to an inline evaluation. - :param result_data: The result data containing generations and grades in each row. - :param metrics: Map of metric name to aggregated value. + :param result_rows: The result data containing inputs, generations and grades in each row. + :param grades: Map of grader id to aggregated value. """ - result_data: List[Dict[str, Any]] - metrics: Dict[str, Any] + result_rows: List[Dict[str, Any]] + grades: Dict[str, Any] class Evaluation(Protocol):