jobs eval scoring

This commit is contained in:
Xi Yan 2025-03-13 11:47:42 -07:00
parent 36320728bf
commit 775e8514b7
4 changed files with 92 additions and 28 deletions

View file

@ -230,7 +230,7 @@
} }
} }
}, },
"/v1/eval/job/{job_id}/cancel": { "/v1/eval/jobs/{job_id}/cancel": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -280,7 +280,7 @@
] ]
} }
}, },
"/v1/scoring/job/{job_id}/cancel": { "/v1/scoring/jobs/{job_id}/cancel": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -923,7 +923,7 @@
] ]
} }
}, },
"/v1/eval/job/{job_id}": { "/v1/eval/jobs/{job_id}": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -1123,7 +1123,7 @@
] ]
} }
}, },
"/v1/scoring/job/{job_id}": { "/v1/scoring/jobs/{job_id}": {
"get": { "get": {
"responses": { "responses": {
"200": { "200": {
@ -5160,26 +5160,36 @@
}, },
"type": { "type": {
"type": "string", "type": "string",
"const": "eval", "enum": [
"default": "eval" "batch_inference",
"scoring",
"evaluation",
"post_training"
],
"default": "evaluation",
"description": "The type of the job."
}, },
"result_files": { "result_files": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "The file ids of the eval results."
}, },
"result_datasets": { "result_datasets": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "The ids of the datasets containing the eval results."
}, },
"benchmark_id": { "benchmark_id": {
"type": "string" "type": "string",
"description": "The id of the benchmark to evaluate on."
}, },
"candidate": { "candidate": {
"$ref": "#/components/schemas/EvalCandidate" "$ref": "#/components/schemas/EvalCandidate",
"description": "The candidate to evaluate on."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -5193,7 +5203,8 @@
"benchmark_id", "benchmark_id",
"candidate" "candidate"
], ],
"title": "EvalJob" "title": "EvalJob",
"description": "An evaluation job."
}, },
"ModelCandidate": { "ModelCandidate": {
"type": "object", "type": "object",
@ -5399,29 +5410,39 @@
}, },
"type": { "type": {
"type": "string", "type": "string",
"const": "scoring", "enum": [
"default": "scoring" "batch_inference",
"scoring",
"evaluation",
"post_training"
],
"default": "scoring",
"description": "The type of the job."
}, },
"result_files": { "result_files": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "The file ids of the scoring results."
}, },
"result_datasets": { "result_datasets": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "The ids of the datasets containing the scoring results."
}, },
"dataset_id": { "dataset_id": {
"type": "string" "type": "string",
"description": "The id of the dataset used for scoring."
}, },
"scoring_fn_ids": { "scoring_fn_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "The ids of the scoring functions used."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -5435,7 +5456,8 @@
"dataset_id", "dataset_id",
"scoring_fn_ids" "scoring_fn_ids"
], ],
"title": "ScoringJob" "title": "ScoringJob",
"description": "A scoring job."
}, },
"CancelTrainingJobRequest": { "CancelTrainingJobRequest": {
"type": "object", "type": "object",

View file

@ -142,7 +142,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/BatchCompletionRequest' $ref: '#/components/schemas/BatchCompletionRequest'
required: true required: true
/v1/eval/job/{job_id}/cancel: /v1/eval/jobs/{job_id}/cancel:
post: post:
responses: responses:
'200': '200':
@ -173,7 +173,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/scoring/job/{job_id}/cancel: /v1/scoring/jobs/{job_id}/cancel:
post: post:
responses: responses:
'200': '200':
@ -622,7 +622,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/eval/job/{job_id}: /v1/eval/jobs/{job_id}:
get: get:
responses: responses:
'200': '200':
@ -756,7 +756,7 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/scoring/job/{job_id}: /v1/scoring/jobs/{job_id}:
get: get:
responses: responses:
'200': '200':
@ -3514,20 +3514,30 @@ components:
If status of the job is failed, this will contain the error message. If status of the job is failed, this will contain the error message.
type: type:
type: string type: string
const: eval enum:
default: eval - batch_inference
- scoring
- evaluation
- post_training
default: evaluation
description: The type of the job.
result_files: result_files:
type: array type: array
items: items:
type: string type: string
description: The file ids of the eval results.
result_datasets: result_datasets:
type: array type: array
items: items:
type: string type: string
description: >-
The ids of the datasets containing the eval results.
benchmark_id: benchmark_id:
type: string type: string
description: The id of the benchmark to evaluate on.
candidate: candidate:
$ref: '#/components/schemas/EvalCandidate' $ref: '#/components/schemas/EvalCandidate'
description: The candidate to evaluate on.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -3539,6 +3549,7 @@ components:
- benchmark_id - benchmark_id
- candidate - candidate
title: EvalJob title: EvalJob
description: An evaluation job.
ModelCandidate: ModelCandidate:
type: object type: object
properties: properties:
@ -3693,22 +3704,32 @@ components:
If status of the job is failed, this will contain the error message. If status of the job is failed, this will contain the error message.
type: type:
type: string type: string
const: scoring enum:
- batch_inference
- scoring
- evaluation
- post_training
default: scoring default: scoring
description: The type of the job.
result_files: result_files:
type: array type: array
items: items:
type: string type: string
description: The file ids of the scoring results.
result_datasets: result_datasets:
type: array type: array
items: items:
type: string type: string
description: >-
The ids of the datasets containing the scoring results.
dataset_id: dataset_id:
type: string type: string
description: The id of the dataset used for scoring.
scoring_fn_ids: scoring_fn_ids:
type: array type: array
items: items:
type: string type: string
description: The ids of the scoring functions used.
additionalProperties: false additionalProperties: false
required: required:
- id - id
@ -3720,6 +3741,7 @@ components:
- dataset_id - dataset_id
- scoring_fn_ids - scoring_fn_ids
title: ScoringJob title: ScoringJob
description: A scoring job.
CancelTrainingJobRequest: CancelTrainingJobRequest:
type: object type: object
properties: properties:

View file

@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
from typing_extensions import Annotated from typing_extensions import Annotated
from llama_stack.apis.agents import AgentConfig from llama_stack.apis.agents import AgentConfig
from llama_stack.apis.common.job_types import CommonJobFields, JobStatus from llama_stack.apis.common.job_types import CommonJobFields, JobType
from llama_stack.apis.inference import SamplingParams, SystemMessage from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult from llama_stack.apis.scoring import ScoringResult
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -63,7 +63,17 @@ class EvaluateResponse(BaseModel):
@json_schema_type @json_schema_type
class EvalJob(CommonJobFields): class EvalJob(CommonJobFields):
type: Literal["eval"] = "eval" """
An evaluation job.
:param type: The type of the job.
:param result_files: The file ids of the eval results.
:param result_datasets: The ids of the datasets containing the eval results.
:param benchmark_id: The id of the benchmark to evaluate on.
:param candidate: The candidate to evaluate on.
"""
type: JobType = JobType.evaluation.value
result_files: List[str] = Field( result_files: List[str] = Field(
description="The file ids of the eval results.", description="The file ids of the eval results.",
default_factory=list, default_factory=list,

View file

@ -50,7 +50,17 @@ class ScoreResponse(BaseModel):
@json_schema_type @json_schema_type
class ScoringJob(CommonJobFields): class ScoringJob(CommonJobFields):
type: Literal["scoring"] = "scoring" """
A scoring job.
:param type: The type of the job.
:param result_files: The file ids of the scoring results.
:param result_datasets: The ids of the datasets containing the scoring results.
:param dataset_id: The id of the dataset used for scoring.
:param scoring_fn_ids: The ids of the scoring functions used.
"""
type: JobType = JobType.scoring.value
result_files: List[str] = Field( result_files: List[str] = Field(
description="The file ids of the scoring results.", description="The file ids of the scoring results.",