diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py
index 6b2190417..51f5c371c 100644
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@@ -51,7 +51,7 @@ EvaluationCandidate = register_schema(
 class EvaluationTask(BaseModel):
     """
     A task for evaluation. To specify a task, one of the following must be provided:
-    - `benchmark_id`: Run evaluation task against a benchmark_id
+    - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders.
     - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
     - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids