diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index e9429a0c0..ff53f1aed 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -8527,23 +8527,28 @@
"type": "object",
"properties": {
"benchmark_id": {
- "type": "string"
+ "type": "string",
+ "description": "The benchmark ID to evaluate."
},
"dataset_id": {
- "type": "string"
+ "type": "string",
+ "description": "The dataset ID to evaluate."
},
"data_source": {
- "$ref": "#/components/schemas/DataSource"
+ "$ref": "#/components/schemas/DataSource",
+ "description": "The data source to evaluate."
},
"grader_ids": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "The grader IDs to evaluate."
}
},
"additionalProperties": false,
- "title": "EvaluationTask"
+ "title": "EvaluationTask",
+ "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
},
"GradeRequest": {
"type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 8de434ba7..45546fa11 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5908,16 +5908,26 @@ components:
properties:
benchmark_id:
type: string
+ description: The benchmark ID to evaluate.
dataset_id:
type: string
+ description: The dataset ID to evaluate.
data_source:
$ref: '#/components/schemas/DataSource'
+ description: The data source to evaluate.
grader_ids:
type: array
items:
type: string
+ description: The grader IDs to evaluate.
additionalProperties: false
title: EvaluationTask
+ description: >-
+ A task for evaluation. To specify a task, one of the following must be provided:
+ - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id`
+ and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
+ - `data_source` and `grader_ids`: Run evaluation task against a data source
+ (e.g. rows, uri, etc.) and a list of grader_ids
GradeRequest:
type: object
properties:
diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py
index 1911b567b..6b2190417 100644
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@@ -49,6 +49,18 @@ EvaluationCandidate = register_schema(
@json_schema_type
class EvaluationTask(BaseModel):
+ """
+ A task for evaluation. To specify a task, one of the following must be provided:
+ - `benchmark_id`: Run evaluation task against a benchmark_id
+ - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
+ - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids
+
+ :param benchmark_id: The benchmark ID to evaluate.
+ :param dataset_id: The dataset ID to evaluate.
+ :param data_source: The data source to evaluate.
+ :param grader_ids: The grader IDs to evaluate.
+ """
+
benchmark_id: Optional[str] = None
dataset_id: Optional[str] = None
data_source: Optional[DataSource] = None