From 775e8514b70036788b17f4c808f6ac90ae1ee6b7 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 13 Mar 2025 11:47:42 -0700 Subject: [PATCH] jobs eval scoring --- docs/_static/llama-stack-spec.html | 58 ++++++++++++++++++++--------- docs/_static/llama-stack-spec.yaml | 36 ++++++++++++++---- llama_stack/apis/eval/eval.py | 14 ++++++- llama_stack/apis/scoring/scoring.py | 12 +++++- 4 files changed, 92 insertions(+), 28 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 48a433495..58d3c918a 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -230,7 +230,7 @@ } } }, - "/v1/eval/job/{job_id}/cancel": { + "/v1/eval/jobs/{job_id}/cancel": { "post": { "responses": { "200": { @@ -280,7 +280,7 @@ ] } }, - "/v1/scoring/job/{job_id}/cancel": { + "/v1/scoring/jobs/{job_id}/cancel": { "post": { "responses": { "200": { @@ -923,7 +923,7 @@ ] } }, - "/v1/eval/job/{job_id}": { + "/v1/eval/jobs/{job_id}": { "get": { "responses": { "200": { @@ -1123,7 +1123,7 @@ ] } }, - "/v1/scoring/job/{job_id}": { + "/v1/scoring/jobs/{job_id}": { "get": { "responses": { "200": { @@ -5160,26 +5160,36 @@ }, "type": { "type": "string", - "const": "eval", - "default": "eval" + "enum": [ + "batch_inference", + "scoring", + "evaluation", + "post_training" + ], + "default": "evaluation", + "description": "The type of the job." }, "result_files": { "type": "array", "items": { "type": "string" - } + }, + "description": "The file ids of the eval results." }, "result_datasets": { "type": "array", "items": { "type": "string" - } + }, + "description": "The ids of the datasets containing the eval results." }, "benchmark_id": { - "type": "string" + "type": "string", + "description": "The id of the benchmark to evaluate on." }, "candidate": { - "$ref": "#/components/schemas/EvalCandidate" + "$ref": "#/components/schemas/EvalCandidate", + "description": "The candidate to evaluate on." } }, "additionalProperties": false, @@ -5193,7 +5203,8 @@ "benchmark_id", "candidate" ], - "title": "EvalJob" + "title": "EvalJob", + "description": "An evaluation job." }, "ModelCandidate": { "type": "object", @@ -5399,29 +5410,39 @@ }, "type": { "type": "string", - "const": "scoring", - "default": "scoring" + "enum": [ + "batch_inference", + "scoring", + "evaluation", + "post_training" + ], + "default": "scoring", + "description": "The type of the job." }, "result_files": { "type": "array", "items": { "type": "string" - } + }, + "description": "The file ids of the scoring results." }, "result_datasets": { "type": "array", "items": { "type": "string" - } + }, + "description": "The ids of the datasets containing the scoring results." }, "dataset_id": { - "type": "string" + "type": "string", + "description": "The id of the dataset used for scoring." }, "scoring_fn_ids": { "type": "array", "items": { "type": "string" - } + }, + "description": "The ids of the scoring functions used." } }, "additionalProperties": false, @@ -5435,7 +5456,8 @@ "dataset_id", "scoring_fn_ids" ], - "title": "ScoringJob" + "title": "ScoringJob", + "description": "A scoring job." }, "CancelTrainingJobRequest": { "type": "object", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 45058fbdc..8220cf5e7 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -142,7 +142,7 @@ paths: schema: $ref: '#/components/schemas/BatchCompletionRequest' required: true - /v1/eval/job/{job_id}/cancel: + /v1/eval/jobs/{job_id}/cancel: post: responses: '200': @@ -173,7 +173,7 @@ paths: required: true schema: type: string - /v1/scoring/job/{job_id}/cancel: + /v1/scoring/jobs/{job_id}/cancel: post: responses: '200': @@ -622,7 +622,7 @@ paths: required: true schema: type: string - /v1/eval/job/{job_id}: + /v1/eval/jobs/{job_id}: get: responses: '200': @@ -756,7 +756,7 @@ paths: required: true schema: type: string - /v1/scoring/job/{job_id}: + /v1/scoring/jobs/{job_id}: get: responses: '200': @@ -3514,20 +3514,30 @@ components: If status of the job is failed, this will contain the error message. type: type: string - const: eval - default: eval + enum: + - batch_inference + - scoring + - evaluation + - post_training + default: evaluation + description: The type of the job. result_files: type: array items: type: string + description: The file ids of the eval results. result_datasets: type: array items: type: string + description: >- + The ids of the datasets containing the eval results. benchmark_id: type: string + description: The id of the benchmark to evaluate on. candidate: $ref: '#/components/schemas/EvalCandidate' + description: The candidate to evaluate on. additionalProperties: false required: - id @@ -3539,6 +3549,7 @@ components: - benchmark_id - candidate title: EvalJob + description: An evaluation job. ModelCandidate: type: object properties: @@ -3693,22 +3704,32 @@ components: If status of the job is failed, this will contain the error message. type: type: string - const: scoring + enum: + - batch_inference + - scoring + - evaluation + - post_training default: scoring + description: The type of the job. result_files: type: array items: type: string + description: The file ids of the scoring results. result_datasets: type: array items: type: string + description: >- + The ids of the datasets containing the scoring results. dataset_id: type: string + description: The id of the dataset used for scoring. scoring_fn_ids: type: array items: type: string + description: The ids of the scoring functions used. additionalProperties: false required: - id @@ -3720,6 +3741,7 @@ components: - dataset_id - scoring_fn_ids title: ScoringJob + description: A scoring job. CancelTrainingJobRequest: type: object properties: diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index d98c9cb87..1d971ab81 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, Field from typing_extensions import Annotated from llama_stack.apis.agents import AgentConfig -from llama_stack.apis.common.job_types import CommonJobFields, JobStatus +from llama_stack.apis.common.job_types import CommonJobFields, JobType from llama_stack.apis.inference import SamplingParams, SystemMessage from llama_stack.apis.scoring import ScoringResult from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -63,7 +63,17 @@ class EvaluateResponse(BaseModel): @json_schema_type class EvalJob(CommonJobFields): - type: Literal["eval"] = "eval" + """ + An evaluation job. + + :param type: The type of the job. + :param result_files: The file ids of the eval results. + :param result_datasets: The ids of the datasets containing the eval results. + :param benchmark_id: The id of the benchmark to evaluate on. + :param candidate: The candidate to evaluate on. + """ + + type: JobType = JobType.evaluation.value result_files: List[str] = Field( description="The file ids of the eval results.", default_factory=list, diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py index d54b34491..961598e35 100644 --- a/llama_stack/apis/scoring/scoring.py +++ b/llama_stack/apis/scoring/scoring.py @@ -50,7 +50,17 @@ class ScoreResponse(BaseModel): @json_schema_type class ScoringJob(CommonJobFields): - type: Literal["scoring"] = "scoring" + """ + A scoring job. + + :param type: The type of the job. + :param result_files: The file ids of the scoring results. + :param result_datasets: The ids of the datasets containing the scoring results. + :param dataset_id: The id of the dataset used for scoring. + :param scoring_fn_ids: The ids of the scoring functions used. + """ + + type: JobType = JobType.scoring.value result_files: List[str] = Field( description="The file ids of the scoring results.",