This commit is contained in:
Xi Yan 2025-03-12 00:09:03 -07:00
parent 5c954dd033
commit 78b4cdad67
2 changed files with 82 additions and 46 deletions

View file

@ -3,21 +3,34 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from datetime import datetime
from enum import Enum from enum import Enum
from typing import Optional
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.schema_utils import json_schema_type from llama_stack.schema_utils import json_schema_type
@json_schema_type
class Job(BaseModel):
job_id: str
@json_schema_type
class JobStatus(Enum): class JobStatus(Enum):
completed = "completed" completed = "completed"
in_progress = "in_progress" in_progress = "in_progress"
failed = "failed" failed = "failed"
scheduled = "scheduled" scheduled = "scheduled"
cancelled = "cancelled"
@json_schema_type
class CommonJobFields(BaseModel):
"""Common fields for all jobs.
:param id: The ID of the job.
:param status: The status of the job.
:param created_at: The time the job was created.
:param finished_at: The time the job finished.
"""
id: str
status: JobStatus
created_at: datetime
finished_at: Optional[datetime] = None

View file

@ -83,15 +83,24 @@ class EvaluateResponse(BaseModel):
scores: Dict[str, ScoringResult] scores: Dict[str, ScoringResult]
@json_schema_type
class EvalJob(Job):
"""The EvalJob object representing a evaluation job that was created through API.
:param job_id: The ID of the job.
:param status: The status of the job.
"""
class Eval(Protocol): class Eval(Protocol):
"""Llama Stack Evaluation API for running evaluations on model and agent candidates.""" """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST") @webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
async def run_eval( async def evaluate_benchmark(
self, self,
benchmark_id: str, benchmark_id: str,
benchmark_config: BenchmarkConfig, benchmark_config: BenchmarkConfig,
) -> Job: ) -> EvalJob:
"""Run an evaluation on a benchmark. """Run an evaluation on a benchmark.
:param benchmark_id: The ID of the benchmark to run the evaluation on. :param benchmark_id: The ID of the benchmark to run the evaluation on.
@ -99,47 +108,61 @@ class Eval(Protocol):
:return: The job that was created to run the evaluation. :return: The job that was created to run the evaluation.
""" """
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST") # TODO: add these back in
async def evaluate_rows( # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
self, # async def run_eval(
benchmark_id: str, # self,
input_rows: List[Dict[str, Any]], # benchmark_id: str,
scoring_functions: List[str], # benchmark_config: BenchmarkConfig,
benchmark_config: BenchmarkConfig, # ) -> Job:
) -> EvaluateResponse: # """Run an evaluation on a benchmark.
"""Evaluate a list of rows on a benchmark.
:param benchmark_id: The ID of the benchmark to run the evaluation on. # :param benchmark_id: The ID of the benchmark to run the evaluation on.
:param input_rows: The rows to evaluate. # :param benchmark_config: The configuration for the benchmark.
:param scoring_functions: The scoring functions to use for the evaluation. # :return: The job that was created to run the evaluation.
:param benchmark_config: The configuration for the benchmark. # """
:return: EvaluateResponse object containing generations and scores
"""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET") # @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: # async def evaluate_rows(
"""Get the status of a job. # self,
# benchmark_id: str,
# input_rows: List[Dict[str, Any]],
# scoring_functions: List[str],
# benchmark_config: BenchmarkConfig,
# ) -> EvaluateResponse:
# """Evaluate a list of rows on a benchmark.
:param benchmark_id: The ID of the benchmark to run the evaluation on. # :param benchmark_id: The ID of the benchmark to run the evaluation on.
:param job_id: The ID of the job to get the status of. # :param input_rows: The rows to evaluate.
:return: The status of the evaluationjob. # :param scoring_functions: The scoring functions to use for the evaluation.
""" # :param benchmark_config: The configuration for the benchmark.
... # :return: EvaluateResponse object containing generations and scores
# """
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE") # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
async def job_cancel(self, benchmark_id: str, job_id: str) -> None: # async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
"""Cancel a job. # """Get the status of a job.
:param benchmark_id: The ID of the benchmark to run the evaluation on. # :param benchmark_id: The ID of the benchmark to run the evaluation on.
:param job_id: The ID of the job to cancel. # :param job_id: The ID of the job to get the status of.
""" # :return: The status of the evaluationjob.
... # """
# ...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET") # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: # async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
"""Get the result of a job. # """Cancel a job.
:param benchmark_id: The ID of the benchmark to run the evaluation on. # :param benchmark_id: The ID of the benchmark to run the evaluation on.
:param job_id: The ID of the job to get the result of. # :param job_id: The ID of the job to cancel.
:return: The result of the job. # """
""" # ...
# @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
# async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
# """Get the result of a job.
# :param benchmark_id: The ID of the benchmark to run the evaluation on.
# :param job_id: The ID of the job to get the result of.
# :return: The result of the job.
# """