diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index 7dff3d60e..04a5a55d5 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -41,7 +41,7 @@ class BenchmarkEvalTaskConfig(BaseModel): type: Literal["benchmark"] = "benchmark" eval_candidate: EvalCandidate num_examples: Optional[int] = Field( - description="Number of examples to evaluate (useful for quick debugging), if not provided, all examples in the dataset will be evaluated", + description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated", default=None, ) @@ -55,7 +55,7 @@ class AppEvalTaskConfig(BaseModel): default_factory=dict, ) num_examples: Optional[int] = Field( - description="Number of examples to evaluate (useful for quick debugging), if not provided, all examples in the dataset will be evaluated", + description="Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated", default=None, ) # we could optinally add any specific dataset config here