From d994499f09f1d2c15741b833a594734c39ec1ff6 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 18 Mar 2025 19:30:01 -0700 Subject: [PATCH] update EvaluationTask --- docs/_static/llama-stack-spec.html | 15 ++++++++++----- docs/_static/llama-stack-spec.yaml | 10 ++++++++++ llama_stack/apis/evaluation/evaluation.py | 12 ++++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index e9429a0c0..ff53f1aed 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -8527,23 +8527,28 @@ "type": "object", "properties": { "benchmark_id": { - "type": "string" + "type": "string", + "description": "The benchmark ID to evaluate." }, "dataset_id": { - "type": "string" + "type": "string", + "description": "The dataset ID to evaluate." }, "data_source": { - "$ref": "#/components/schemas/DataSource" + "$ref": "#/components/schemas/DataSource", + "description": "The data source to evaluate." }, "grader_ids": { "type": "array", "items": { "type": "string" - } + }, + "description": "The grader IDs to evaluate." } }, "additionalProperties": false, - "title": "EvaluationTask" + "title": "EvaluationTask", + "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids" }, "GradeRequest": { "type": "object", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 8de434ba7..45546fa11 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5908,16 +5908,26 @@ components: properties: benchmark_id: type: string + description: The benchmark ID to evaluate. dataset_id: type: string + description: The dataset ID to evaluate. data_source: $ref: '#/components/schemas/DataSource' + description: The data source to evaluate. grader_ids: type: array items: type: string + description: The grader IDs to evaluate. additionalProperties: false title: EvaluationTask + description: >- + A task for evaluation. To specify a task, one of the following must be provided: + - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` + and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids + - `data_source` and `grader_ids`: Run evaluation task against a data source + (e.g. rows, uri, etc.) and a list of grader_ids GradeRequest: type: object properties: diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py index 1911b567b..6b2190417 100644 --- a/llama_stack/apis/evaluation/evaluation.py +++ b/llama_stack/apis/evaluation/evaluation.py @@ -49,6 +49,18 @@ EvaluationCandidate = register_schema( @json_schema_type class EvaluationTask(BaseModel): + """ + A task for evaluation. To specify a task, one of the following must be provided: + - `benchmark_id`: Run evaluation task against a benchmark_id + - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids + - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids + + :param benchmark_id: The benchmark ID to evaluate. + :param dataset_id: The dataset ID to evaluate. + :param data_source: The data source to evaluate. + :param grader_ids: The grader IDs to evaluate. + """ + benchmark_id: Optional[str] = None dataset_id: Optional[str] = None data_source: Optional[DataSource] = None