jobs eval scoring

This commit is contained in:
Xi Yan 2025-03-13 11:47:42 -07:00
parent 36320728bf
commit 775e8514b7
4 changed files with 92 additions and 28 deletions

View file

@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
from typing_extensions import Annotated
from llama_stack.apis.agents import AgentConfig
from llama_stack.apis.common.job_types import CommonJobFields, JobStatus
from llama_stack.apis.common.job_types import CommonJobFields, JobType
from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -63,7 +63,17 @@ class EvaluateResponse(BaseModel):
@json_schema_type
class EvalJob(CommonJobFields):
type: Literal["eval"] = "eval"
"""
An evaluation job.
:param type: The type of the job.
:param result_files: The file ids of the eval results.
:param result_datasets: The ids of the datasets containing the eval results.
:param benchmark_id: The id of the benchmark to evaluate on.
:param candidate: The candidate to evaluate on.
"""
type: JobType = JobType.evaluation.value
result_files: List[str] = Field(
description="The file ids of the eval results.",
default_factory=list,

View file

@ -50,7 +50,17 @@ class ScoreResponse(BaseModel):
@json_schema_type
class ScoringJob(CommonJobFields):
type: Literal["scoring"] = "scoring"
"""
A scoring job.
:param type: The type of the job.
:param result_files: The file ids of the scoring results.
:param result_datasets: The ids of the datasets containing the scoring results.
:param dataset_id: The id of the dataset used for scoring.
:param scoring_fn_ids: The ids of the scoring functions used.
"""
type: JobType = JobType.scoring.value
result_files: List[str] = Field(
description="The file ids of the scoring results.",