jobs eval scoring

2025-12-31 03:53:51 +00:00 · 2025-03-13 11:47:42 -07:00 · 2025-03-13 11:47:42 -07:00 · 775e8514b7
commit 775e8514b7
parent 36320728bf
4 changed files with 92 additions and 28 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -230,7 +230,7 @@
                }
            }
        },
-        "/v1/eval/job/{job_id}/cancel": {
+        "/v1/eval/jobs/{job_id}/cancel": {
            "post": {
                "responses": {
                    "200": {
@ -280,7 +280,7 @@
                ]
            }
        },
-        "/v1/scoring/job/{job_id}/cancel": {
+        "/v1/scoring/jobs/{job_id}/cancel": {
            "post": {
                "responses": {
                    "200": {
@ -923,7 +923,7 @@
                ]
            }
        },
-        "/v1/eval/job/{job_id}": {
+        "/v1/eval/jobs/{job_id}": {
            "get": {
                "responses": {
                    "200": {
@ -1123,7 +1123,7 @@
                ]
            }
        },
-        "/v1/scoring/job/{job_id}": {
+        "/v1/scoring/jobs/{job_id}": {
            "get": {
                "responses": {
                    "200": {
@ -5160,26 +5160,36 @@
                    },
                    "type": {
                        "type": "string",
-                        "const": "eval",
+                        "enum": [
-                        "default": "eval"
+                            "batch_inference",
                            "scoring",
                            "evaluation",
                            "post_training"
                        ],
                        "default": "evaluation",
                        "description": "The type of the job."
                    },
                    "result_files": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "The file ids of the eval results."
                    },
                    "result_datasets": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "The ids of the datasets containing the eval results."
                    },
                    "benchmark_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The id of the benchmark to evaluate on."
                    },
                    "candidate": {
-                        "$ref": "#/components/schemas/EvalCandidate"
+                        "$ref": "#/components/schemas/EvalCandidate",
                        "description": "The candidate to evaluate on."
                    }
                },
                "additionalProperties": false,
@ -5193,7 +5203,8 @@
                    "benchmark_id",
                    "candidate"
                ],
-                "title": "EvalJob"
+                "title": "EvalJob",
                "description": "An evaluation job."
            },
            "ModelCandidate": {
                "type": "object",
@ -5399,29 +5410,39 @@
                    },
                    "type": {
                        "type": "string",
-                        "const": "scoring",
+                        "enum": [
-                        "default": "scoring"
+                            "batch_inference",
                            "scoring",
                            "evaluation",
                            "post_training"
                        ],
                        "default": "scoring",
                        "description": "The type of the job."
                    },
                    "result_files": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "The file ids of the scoring results."
                    },
                    "result_datasets": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "The ids of the datasets containing the scoring results."
                    },
                    "dataset_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The id of the dataset used for scoring."
                    },
                    "scoring_fn_ids": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "The ids of the scoring functions used."
                    }
                },
                "additionalProperties": false,
@ -5435,7 +5456,8 @@
                    "dataset_id",
                    "scoring_fn_ids"
                ],
-                "title": "ScoringJob"
+                "title": "ScoringJob",
                "description": "A scoring job."
            },
            "CancelTrainingJobRequest": {
                "type": "object",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -142,7 +142,7 @@ paths:
            schema:
              $ref: '#/components/schemas/BatchCompletionRequest'
        required: true
-  /v1/eval/job/{job_id}/cancel:
+  /v1/eval/jobs/{job_id}/cancel:
    post:
      responses:
        '200':
@ -173,7 +173,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/scoring/job/{job_id}/cancel:
+  /v1/scoring/jobs/{job_id}/cancel:
    post:
      responses:
        '200':
@ -622,7 +622,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/eval/job/{job_id}:
+  /v1/eval/jobs/{job_id}:
    get:
      responses:
        '200':
@ -756,7 +756,7 @@ paths:
          required: true
          schema:
            type: string
-  /v1/scoring/job/{job_id}:
+  /v1/scoring/jobs/{job_id}:
    get:
      responses:
        '200':
@ -3514,20 +3514,30 @@ components:
            If status of the job is failed, this will contain the error message.
        type:
          type: string
-          const: eval
+          enum:
-          default: eval
+            - batch_inference
            - scoring
            - evaluation
            - post_training
          default: evaluation
          description: The type of the job.
        result_files:
          type: array
          items:
            type: string
          description: The file ids of the eval results.
        result_datasets:
          type: array
          items:
            type: string
          description: >-
            The ids of the datasets containing the eval results.
        benchmark_id:
          type: string
          description: The id of the benchmark to evaluate on.
        candidate:
          $ref: '#/components/schemas/EvalCandidate'
          description: The candidate to evaluate on.
      additionalProperties: false
      required:
        - id
@ -3539,6 +3549,7 @@ components:
        - benchmark_id
        - candidate
      title: EvalJob
      description: An evaluation job.
    ModelCandidate:
      type: object
      properties:
@ -3693,22 +3704,32 @@ components:
            If status of the job is failed, this will contain the error message.
        type:
          type: string
-          const: scoring
+          enum:
            - batch_inference
            - scoring
            - evaluation
            - post_training
          default: scoring
          description: The type of the job.
        result_files:
          type: array
          items:
            type: string
          description: The file ids of the scoring results.
        result_datasets:
          type: array
          items:
            type: string
          description: >-
            The ids of the datasets containing the scoring results.
        dataset_id:
          type: string
          description: The id of the dataset used for scoring.
        scoring_fn_ids:
          type: array
          items:
            type: string
          description: The ids of the scoring functions used.
      additionalProperties: false
      required:
        - id
@ -3720,6 +3741,7 @@ components:
        - dataset_id
        - scoring_fn_ids
      title: ScoringJob
      description: A scoring job.
    CancelTrainingJobRequest:
      type: object
      properties:
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
 from typing_extensions import Annotated
 from llama_stack.apis.agents import AgentConfig
-from llama_stack.apis.common.job_types import CommonJobFields, JobStatus
+from llama_stack.apis.common.job_types import CommonJobFields, JobType
 from llama_stack.apis.inference import SamplingParams, SystemMessage
 from llama_stack.apis.scoring import ScoringResult
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -63,7 +63,17 @@ class EvaluateResponse(BaseModel):
@json_schema_type
 class EvalJob(CommonJobFields):
-    type: Literal["eval"] = "eval"
+    """
    An evaluation job.
    :param type: The type of the job.
    :param result_files: The file ids of the eval results.
    :param result_datasets: The ids of the datasets containing the eval results.
    :param benchmark_id: The id of the benchmark to evaluate on.
    :param candidate: The candidate to evaluate on.
    """
    type: JobType = JobType.evaluation.value
    result_files: List[str] = Field(
        description="The file ids of the eval results.",
        default_factory=list,
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@ -50,7 +50,17 @@ class ScoreResponse(BaseModel):
@json_schema_type
 class ScoringJob(CommonJobFields):
-    type: Literal["scoring"] = "scoring"
+    """
    A scoring job.
    :param type: The type of the job.
    :param result_files: The file ids of the scoring results.
    :param result_datasets: The ids of the datasets containing the scoring results.
    :param dataset_id: The id of the dataset used for scoring.
    :param scoring_fn_ids: The ids of the scoring functions used.
    """
    type: JobType = JobType.scoring.value
    result_files: List[str] = Field(
        description="The file ids of the scoring results.",