From 775e8514b70036788b17f4c808f6ac90ae1ee6b7 Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Thu, 13 Mar 2025 11:47:42 -0700
Subject: [PATCH] jobs eval scoring

---
 docs/_static/llama-stack-spec.html  | 58 ++++++++++++++++++++---------
 docs/_static/llama-stack-spec.yaml  | 36 ++++++++++++++----
 llama_stack/apis/eval/eval.py       | 14 ++++++-
 llama_stack/apis/scoring/scoring.py | 12 +++++-
 4 files changed, 92 insertions(+), 28 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 48a433495..58d3c918a 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -230,7 +230,7 @@
                 }
             }
         },
-        "/v1/eval/job/{job_id}/cancel": {
+        "/v1/eval/jobs/{job_id}/cancel": {
             "post": {
                 "responses": {
                     "200": {
@@ -280,7 +280,7 @@
                 ]
             }
         },
-        "/v1/scoring/job/{job_id}/cancel": {
+        "/v1/scoring/jobs/{job_id}/cancel": {
             "post": {
                 "responses": {
                     "200": {
@@ -923,7 +923,7 @@
                 ]
             }
         },
-        "/v1/eval/job/{job_id}": {
+        "/v1/eval/jobs/{job_id}": {
             "get": {
                 "responses": {
                     "200": {
@@ -1123,7 +1123,7 @@
                 ]
             }
         },
-        "/v1/scoring/job/{job_id}": {
+        "/v1/scoring/jobs/{job_id}": {
             "get": {
                 "responses": {
                     "200": {
@@ -5160,26 +5160,36 @@
                     },
                     "type": {
                         "type": "string",
-                        "const": "eval",
-                        "default": "eval"
+                        "enum": [
+                            "batch_inference",
+                            "scoring",
+                            "evaluation",
+                            "post_training"
+                        ],
+                        "default": "evaluation",
+                        "description": "The type of the job."
                     },
                     "result_files": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The file ids of the eval results."
                     },
                     "result_datasets": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The ids of the datasets containing the eval results."
                     },
                     "benchmark_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The id of the benchmark to evaluate on."
                     },
                     "candidate": {
-                        "$ref": "#/components/schemas/EvalCandidate"
+                        "$ref": "#/components/schemas/EvalCandidate",
+                        "description": "The candidate to evaluate on."
                     }
                 },
                 "additionalProperties": false,
@@ -5193,7 +5203,8 @@
                     "benchmark_id",
                     "candidate"
                 ],
-                "title": "EvalJob"
+                "title": "EvalJob",
+                "description": "An evaluation job."
             },
             "ModelCandidate": {
                 "type": "object",
@@ -5399,29 +5410,39 @@
                     },
                     "type": {
                         "type": "string",
-                        "const": "scoring",
-                        "default": "scoring"
+                        "enum": [
+                            "batch_inference",
+                            "scoring",
+                            "evaluation",
+                            "post_training"
+                        ],
+                        "default": "scoring",
+                        "description": "The type of the job."
                     },
                     "result_files": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The file ids of the scoring results."
                     },
                     "result_datasets": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The ids of the datasets containing the scoring results."
                     },
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The id of the dataset used for scoring."
                     },
                     "scoring_fn_ids": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The ids of the scoring functions used."
                     }
                 },
                 "additionalProperties": false,
@@ -5435,7 +5456,8 @@
                     "dataset_id",
                     "scoring_fn_ids"
                 ],
-                "title": "ScoringJob"
+                "title": "ScoringJob",
+                "description": "A scoring job."
             },
             "CancelTrainingJobRequest": {
                 "type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 45058fbdc..8220cf5e7 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -142,7 +142,7 @@ paths:
             schema:
               $ref: '#/components/schemas/BatchCompletionRequest'
         required: true
-  /v1/eval/job/{job_id}/cancel:
+  /v1/eval/jobs/{job_id}/cancel:
     post:
       responses:
         '200':
@@ -173,7 +173,7 @@ paths:
           required: true
           schema:
             type: string
-  /v1/scoring/job/{job_id}/cancel:
+  /v1/scoring/jobs/{job_id}/cancel:
     post:
       responses:
         '200':
@@ -622,7 +622,7 @@ paths:
           required: true
           schema:
             type: string
-  /v1/eval/job/{job_id}:
+  /v1/eval/jobs/{job_id}:
     get:
       responses:
         '200':
@@ -756,7 +756,7 @@ paths:
           required: true
           schema:
             type: string
-  /v1/scoring/job/{job_id}:
+  /v1/scoring/jobs/{job_id}:
     get:
       responses:
         '200':
@@ -3514,20 +3514,30 @@ components:
             If status of the job is failed, this will contain the error message.
         type:
           type: string
-          const: eval
-          default: eval
+          enum:
+            - batch_inference
+            - scoring
+            - evaluation
+            - post_training
+          default: evaluation
+          description: The type of the job.
         result_files:
           type: array
           items:
             type: string
+          description: The file ids of the eval results.
         result_datasets:
           type: array
           items:
             type: string
+          description: >-
+            The ids of the datasets containing the eval results.
         benchmark_id:
           type: string
+          description: The id of the benchmark to evaluate on.
         candidate:
           $ref: '#/components/schemas/EvalCandidate'
+          description: The candidate to evaluate on.
       additionalProperties: false
       required:
         - id
@@ -3539,6 +3549,7 @@ components:
         - benchmark_id
         - candidate
       title: EvalJob
+      description: An evaluation job.
     ModelCandidate:
       type: object
       properties:
@@ -3693,22 +3704,32 @@ components:
             If status of the job is failed, this will contain the error message.
         type:
           type: string
-          const: scoring
+          enum:
+            - batch_inference
+            - scoring
+            - evaluation
+            - post_training
           default: scoring
+          description: The type of the job.
         result_files:
           type: array
           items:
             type: string
+          description: The file ids of the scoring results.
         result_datasets:
           type: array
           items:
             type: string
+          description: >-
+            The ids of the datasets containing the scoring results.
         dataset_id:
           type: string
+          description: The id of the dataset used for scoring.
         scoring_fn_ids:
           type: array
           items:
             type: string
+          description: The ids of the scoring functions used.
       additionalProperties: false
       required:
         - id
@@ -3720,6 +3741,7 @@ components:
         - dataset_id
         - scoring_fn_ids
       title: ScoringJob
+      description: A scoring job.
     CancelTrainingJobRequest:
       type: object
       properties:
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index d98c9cb87..1d971ab81 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
 from typing_extensions import Annotated
 
 from llama_stack.apis.agents import AgentConfig
-from llama_stack.apis.common.job_types import CommonJobFields, JobStatus
+from llama_stack.apis.common.job_types import CommonJobFields, JobType
 from llama_stack.apis.inference import SamplingParams, SystemMessage
 from llama_stack.apis.scoring import ScoringResult
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@@ -63,7 +63,17 @@ class EvaluateResponse(BaseModel):
 
 @json_schema_type
 class EvalJob(CommonJobFields):
-    type: Literal["eval"] = "eval"
+    """
+    An evaluation job.
+
+    :param type: The type of the job.
+    :param result_files: The file ids of the eval results.
+    :param result_datasets: The ids of the datasets containing the eval results.
+    :param benchmark_id: The id of the benchmark to evaluate on.
+    :param candidate: The candidate to evaluate on.
+    """
+
+    type: JobType = JobType.evaluation.value
     result_files: List[str] = Field(
         description="The file ids of the eval results.",
         default_factory=list,
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index d54b34491..961598e35 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -50,7 +50,17 @@ class ScoreResponse(BaseModel):
 
 @json_schema_type
 class ScoringJob(CommonJobFields):
-    type: Literal["scoring"] = "scoring"
+    """
+    A scoring job.
+
+    :param type: The type of the job.
+    :param result_files: The file ids of the scoring results.
+    :param result_datasets: The ids of the datasets containing the scoring results.
+    :param dataset_id: The id of the dataset used for scoring.
+    :param scoring_fn_ids: The ids of the scoring functions used.
+    """
+
+    type: JobType = JobType.scoring.value
 
     result_files: List[str] = Field(
         description="The file ids of the scoring results.",