update EvaluationTask

2025-03-18 19:30:01 -07:00 · 2025-03-18 19:30:01 -07:00 · d994499f09
commit d994499f09
parent f107e3229b
3 changed files with 32 additions and 5 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -8527,23 +8527,28 @@
                "type": "object",
                "properties": {
                    "benchmark_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The benchmark ID to evaluate."
                    },
                    "dataset_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The dataset ID to evaluate."
                    },
                    "data_source": {
-                        "$ref": "#/components/schemas/DataSource"
+                        "$ref": "#/components/schemas/DataSource",
                        "description": "The data source to evaluate."
                    },
                    "grader_ids": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "The grader IDs to evaluate."
                    }
                },
                "additionalProperties": false,
-                "title": "EvaluationTask"
+                "title": "EvaluationTask",
                "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
            },
            "GradeRequest": {
                "type": "object",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -5908,16 +5908,26 @@ components:
      properties:
        benchmark_id:
          type: string
          description: The benchmark ID to evaluate.
        dataset_id:
          type: string
          description: The dataset ID to evaluate.
        data_source:
          $ref: '#/components/schemas/DataSource'
          description: The data source to evaluate.
        grader_ids:
          type: array
          items:
            type: string
          description: The grader IDs to evaluate.
      additionalProperties: false
      title: EvaluationTask
      description: >-
        A task for evaluation. To specify a task, one of the following must be provided:
        - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id`
        and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
        - `data_source` and `grader_ids`: Run evaluation task against a data source
        (e.g. rows, uri, etc.) and a list of grader_ids
    GradeRequest:
      type: object
      properties:
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@ -49,6 +49,18 @@ EvaluationCandidate = register_schema(
@json_schema_type
 class EvaluationTask(BaseModel):
    """
    A task for evaluation. To specify a task, one of the following must be provided:
    - `benchmark_id`: Run evaluation task against a benchmark_id
    - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
    - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids
    :param benchmark_id: The benchmark ID to evaluate.
    :param dataset_id: The dataset ID to evaluate.
    :param data_source: The data source to evaluate.
    :param grader_ids: The grader IDs to evaluate.
    """
    benchmark_id: Optional[str] = None
    dataset_id: Optional[str] = None
    data_source: Optional[DataSource] = None