update EvaluationTask

2025-03-18 19:28:34 -07:00 · 2025-03-18 19:28:34 -07:00 · f107e3229b
commit f107e3229b
parent 5e817cd56a
3 changed files with 56 additions and 195 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -2035,7 +2035,7 @@
                ]
            }
        },
-        "/v1/evaluation/grading": {
+        "/v1/evaluation/grade": {
            "post": {
                "responses": {
                    "200": {
@ -8523,32 +8523,14 @@
                ],
                "title": "VectorDB"
            },
-            "BenchmarkEvaluationTask": {
+            "EvaluationTask": {
                "type": "object",
                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "benchmark",
-                        "default": "benchmark"
-                    },
                    "benchmark_id": {
                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "benchmark_id"
-                ],
-                "title": "BenchmarkEvaluationTask"
-            },
-            "DataEvaluationTask": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "data",
-                        "default": "data"
+                    },
+                    "dataset_id": {
+                        "type": "string"
                    },
                    "data_source": {
                        "$ref": "#/components/schemas/DataSource"
@ -8561,66 +8543,14 @@
                    }
                },
                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "data_source",
-                    "grader_ids"
-                ],
-                "title": "DataEvaluationTask"
-            },
-            "DatasetEvaluationTask": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "dataset",
-                        "default": "dataset"
-                    },
-                    "dataset_id": {
-                        "type": "string"
-                    },
-                    "grader_ids": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "dataset_id",
-                    "grader_ids"
-                ],
-                "title": "DatasetEvaluationTask"
-            },
-            "EvaluationTask": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/BenchmarkEvaluationTask"
-                    },
-                    {
-                        "$ref": "#/components/schemas/DatasetEvaluationTask"
-                    },
-                    {
-                        "$ref": "#/components/schemas/DataEvaluationTask"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "benchmark": "#/components/schemas/BenchmarkEvaluationTask",
-                        "dataset": "#/components/schemas/DatasetEvaluationTask",
-                        "data": "#/components/schemas/DataEvaluationTask"
-                    }
-                }
+                "title": "EvaluationTask"
            },
            "GradeRequest": {
                "type": "object",
                "properties": {
                    "task": {
                        "$ref": "#/components/schemas/EvaluationTask",
-                        "description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
+                        "description": "The task to evaluate. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
                    }
                },
                "additionalProperties": false,
@ -8706,7 +8636,7 @@
                "properties": {
                    "task": {
                        "$ref": "#/components/schemas/EvaluationTask",
-                        "description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
+                        "description": "The task to evaluate. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
                    }
                },
                "additionalProperties": false,
@ -10737,7 +10667,7 @@
                "properties": {
                    "task": {
                        "$ref": "#/components/schemas/EvaluationTask",
-                        "description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
+                        "description": "The task to evaluate. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
                    },
                    "candidate": {
                        "$ref": "#/components/schemas/EvaluationCandidate",
@ -10839,7 +10769,7 @@
                "properties": {
                    "task": {
                        "$ref": "#/components/schemas/EvaluationTask",
-                        "description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
+                        "description": "The task to evaluate. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
                    },
                    "candidate": {
                        "$ref": "#/components/schemas/EvaluationCandidate",