From d994499f09f1d2c15741b833a594734c39ec1ff6 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Tue, 18 Mar 2025 19:30:01 -0700
Subject: [PATCH] update EvaluationTask

---
 docs/_static/llama-stack-spec.html        | 15 ++++++++++-----
 docs/_static/llama-stack-spec.yaml        | 10 ++++++++++
 llama_stack/apis/evaluation/evaluation.py | 12 ++++++++++++
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index e9429a0c0..ff53f1aed 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -8527,23 +8527,28 @@
                 "type": "object",
                 "properties": {
                     "benchmark_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The benchmark ID to evaluate."
                     },
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The dataset ID to evaluate."
                     },
                     "data_source": {
-                        "$ref": "#/components/schemas/DataSource"
+                        "$ref": "#/components/schemas/DataSource",
+                        "description": "The data source to evaluate."
                     },
                     "grader_ids": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The grader IDs to evaluate."
                     }
                 },
                 "additionalProperties": false,
-                "title": "EvaluationTask"
+                "title": "EvaluationTask",
+                "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
             },
             "GradeRequest": {
                 "type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 8de434ba7..45546fa11 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5908,16 +5908,26 @@ components:
       properties:
         benchmark_id:
           type: string
+          description: The benchmark ID to evaluate.
         dataset_id:
           type: string
+          description: The dataset ID to evaluate.
         data_source:
           $ref: '#/components/schemas/DataSource'
+          description: The data source to evaluate.
         grader_ids:
           type: array
           items:
             type: string
+          description: The grader IDs to evaluate.
       additionalProperties: false
       title: EvaluationTask
+      description: >-
+        A task for evaluation. To specify a task, one of the following must be provided:
+        - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id`
+        and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
+        - `data_source` and `grader_ids`: Run evaluation task against a data source
+        (e.g. rows, uri, etc.) and a list of grader_ids
     GradeRequest:
       type: object
       properties:
diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py
index 1911b567b..6b2190417 100644
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@@ -49,6 +49,18 @@ EvaluationCandidate = register_schema(
 
 @json_schema_type
 class EvaluationTask(BaseModel):
+    """
+    A task for evaluation. To specify a task, one of the following must be provided:
+    - `benchmark_id`: Run evaluation task against a benchmark_id
+    - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
+    - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids
+
+    :param benchmark_id: The benchmark ID to evaluate.
+    :param dataset_id: The dataset ID to evaluate.
+    :param data_source: The data source to evaluate.
+    :param grader_ids: The grader IDs to evaluate.
+    """
+
     benchmark_id: Optional[str] = None
     dataset_id: Optional[str] = None
     data_source: Optional[DataSource] = None