update

2025-03-18 18:16:00 -07:00 · 2025-03-18 18:16:00 -07:00 · 5e817cd56a
commit 5e817cd56a
parent 398319fe7a
3 changed files with 187 additions and 187 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -2035,6 +2035,49 @@
                ]
            }
        },
+        "/v1/evaluation/grading": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "The evaluation job containing grader scores.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/EvaluationJob"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "Evaluation"
+                ],
+                "description": "Schedule a grading job, by grading generated (model or agent) results. The generated results are expected to be in the dataset.",
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/GradeRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/v1/evaluation/grade_sync": {
            "post": {
                "responses": {
@ -2078,49 +2121,6 @@
                }
            }
        },
-        "/v1/evaluation/grading": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "The evaluation job containing grader scores.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJob"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Evaluation"
-                ],
-                "description": "Schedule a grading job, by grading generated results. The generated results are expected to be in the dataset.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/GradingRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
        "/v1/health": {
            "get": {
                "responses": {
@ -8615,7 +8615,7 @@
                    }
                }
            },
-            "GradeSyncRequest": {
+            "GradeRequest": {
                "type": "object",
                "properties": {
                    "task": {
@ -8627,69 +8627,7 @@
                "required": [
                    "task"
                ],
-                "title": "GradeSyncRequest"
-            },
-            "EvaluationResponse": {
-                "type": "object",
-                "properties": {
-                    "generations": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The generations in rows for the evaluation."
-                    },
-                    "scores": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ScoringResult"
-                        },
-                        "description": "The scores for the evaluation. Map of grader id to ScoringResult."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "generations",
-                    "scores"
-                ],
-                "title": "EvaluationResponse",
-                "description": "A response to an inline evaluation."
-            },
-            "GradingRequest": {
-                "type": "object",
-                "properties": {
-                    "task": {
-                        "$ref": "#/components/schemas/EvaluationTask",
-                        "description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "task"
-                ],
-                "title": "GradingRequest"
+                "title": "GradeRequest"
            },
            "EvaluationCandidate": {
                "oneOf": [
@ -8763,6 +8701,68 @@
                ],
                "title": "EvaluationJob"
            },
+            "GradeSyncRequest": {
+                "type": "object",
+                "properties": {
+                    "task": {
+                        "$ref": "#/components/schemas/EvaluationTask",
+                        "description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "task"
+                ],
+                "title": "GradeSyncRequest"
+            },
+            "EvaluationResponse": {
+                "type": "object",
+                "properties": {
+                    "generations": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The generations in rows for the evaluation."
+                    },
+                    "scores": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        },
+                        "description": "The scores for the evaluation. Map of grader id to ScoringResult."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "generations",
+                    "scores"
+                ],
+                "title": "EvaluationResponse",
+                "description": "A response to an inline evaluation."
+            },
            "HealthInfo": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -1385,6 +1385,38 @@ paths:
          required: true
          schema:
            type: string
+  /v1/evaluation/grading:
+    post:
+      responses:
+        '200':
+          description: >-
+            The evaluation job containing grader scores.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EvaluationJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - Evaluation
+      description: >-
+        Schedule a grading job, by grading generated (model or agent) results. The
+        generated results are expected to be in the dataset.
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GradeRequest'
+        required: true
  /v1/evaluation/grade_sync:
    post:
      responses:
@ -1420,38 +1452,6 @@ paths:
            schema:
              $ref: '#/components/schemas/GradeSyncRequest'
        required: true
-  /v1/evaluation/grading:
-    post:
-      responses:
-        '200':
-          description: >-
-            The evaluation job containing grader scores.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EvaluationJob'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Evaluation
-      description: >-
-        Schedule a grading job, by grading generated results. The generated results
-        are expected to be in the dataset.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/GradingRequest'
-        required: true
  /v1/health:
    get:
      responses:
@ -5966,7 +5966,7 @@ components:
          benchmark: '#/components/schemas/BenchmarkEvaluationTask'
          dataset: '#/components/schemas/DatasetEvaluationTask'
          data: '#/components/schemas/DataEvaluationTask'
-    GradeSyncRequest:
+    GradeRequest:
      type: object
      properties:
        task:
@ -5980,51 +5980,7 @@ components:
      additionalProperties: false
      required:
        - task
-      title: GradeSyncRequest
-    EvaluationResponse:
-      type: object
-      properties:
-        generations:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The generations in rows for the evaluation.
-        scores:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            The scores for the evaluation. Map of grader id to ScoringResult.
-      additionalProperties: false
-      required:
-        - generations
-        - scores
-      title: EvaluationResponse
-      description: A response to an inline evaluation.
-    GradingRequest:
-      type: object
-      properties:
-        task:
-          $ref: '#/components/schemas/EvaluationTask'
-          description: >-
-            The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation
-            task against a benchmark_id - DatasetEvaluationTask: Run evaluation task
-            against a dataset_id and a list of grader_ids - DataEvaluationTask: Run
-            evaluation task against a data source (e.g. rows, uri, etc.) and a list
-            of grader_ids
-      additionalProperties: false
-      required:
-        - task
-      title: GradingRequest
+      title: GradeRequest
    EvaluationCandidate:
      oneOf:
        - $ref: '#/components/schemas/ModelCandidate'
@ -6078,6 +6034,50 @@ components:
        - task
        - candidate
      title: EvaluationJob
+    GradeSyncRequest:
+      type: object
+      properties:
+        task:
+          $ref: '#/components/schemas/EvaluationTask'
+          description: >-
+            The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation
+            task against a benchmark_id - DatasetEvaluationTask: Run evaluation task
+            against a dataset_id and a list of grader_ids - DataEvaluationTask: Run
+            evaluation task against a data source (e.g. rows, uri, etc.) and a list
+            of grader_ids
+      additionalProperties: false
+      required:
+        - task
+      title: GradeSyncRequest
+    EvaluationResponse:
+      type: object
+      properties:
+        generations:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            The generations in rows for the evaluation.
+        scores:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            The scores for the evaluation. Map of grader id to ScoringResult.
+      additionalProperties: false
+      required:
+        - generations
+        - scores
+      title: EvaluationResponse
+      description: A response to an inline evaluation.
    HealthInfo:
      type: object
      properties:
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@ -148,9 +148,9 @@ class Evaluation(Protocol):
        ...

    @webmethod(route="/evaluation/grading", method="POST")
-    async def grading(self, task: EvaluationTask) -> EvaluationJob:
+    async def grade(self, task: EvaluationTask) -> EvaluationJob:
        """
-        Schedule a grading job, by grading generated results. The generated results are expected to be in the dataset.
+        Schedule a grading job, by grading generated (model or agent) results. The generated results are expected to be in the dataset.

        :param task: The task to evaluate. One of:
         - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id