mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-10 04:08:31 +00:00
wip
This commit is contained in:
parent
5c954dd033
commit
78b4cdad67
2 changed files with 82 additions and 46 deletions
|
@ -3,21 +3,34 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.schema_utils import json_schema_type
|
from llama_stack.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class Job(BaseModel):
|
|
||||||
job_id: str
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
|
||||||
class JobStatus(Enum):
|
class JobStatus(Enum):
|
||||||
completed = "completed"
|
completed = "completed"
|
||||||
in_progress = "in_progress"
|
in_progress = "in_progress"
|
||||||
failed = "failed"
|
failed = "failed"
|
||||||
scheduled = "scheduled"
|
scheduled = "scheduled"
|
||||||
|
cancelled = "cancelled"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class CommonJobFields(BaseModel):
|
||||||
|
"""Common fields for all jobs.
|
||||||
|
|
||||||
|
:param id: The ID of the job.
|
||||||
|
:param status: The status of the job.
|
||||||
|
:param created_at: The time the job was created.
|
||||||
|
:param finished_at: The time the job finished.
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
status: JobStatus
|
||||||
|
created_at: datetime
|
||||||
|
finished_at: Optional[datetime] = None
|
||||||
|
|
|
@ -83,15 +83,24 @@ class EvaluateResponse(BaseModel):
|
||||||
scores: Dict[str, ScoringResult]
|
scores: Dict[str, ScoringResult]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class EvalJob(Job):
|
||||||
|
"""The EvalJob object representing a evaluation job that was created through API.
|
||||||
|
|
||||||
|
:param job_id: The ID of the job.
|
||||||
|
:param status: The status of the job.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Eval(Protocol):
|
class Eval(Protocol):
|
||||||
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
|
@webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
|
||||||
async def run_eval(
|
async def evaluate_benchmark(
|
||||||
self,
|
self,
|
||||||
benchmark_id: str,
|
benchmark_id: str,
|
||||||
benchmark_config: BenchmarkConfig,
|
benchmark_config: BenchmarkConfig,
|
||||||
) -> Job:
|
) -> EvalJob:
|
||||||
"""Run an evaluation on a benchmark.
|
"""Run an evaluation on a benchmark.
|
||||||
|
|
||||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
|
@ -99,47 +108,61 @@ class Eval(Protocol):
|
||||||
:return: The job that was created to run the evaluation.
|
:return: The job that was created to run the evaluation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
|
# TODO: add these back in
|
||||||
async def evaluate_rows(
|
# @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
|
||||||
self,
|
# async def run_eval(
|
||||||
benchmark_id: str,
|
# self,
|
||||||
input_rows: List[Dict[str, Any]],
|
# benchmark_id: str,
|
||||||
scoring_functions: List[str],
|
# benchmark_config: BenchmarkConfig,
|
||||||
benchmark_config: BenchmarkConfig,
|
# ) -> Job:
|
||||||
) -> EvaluateResponse:
|
# """Run an evaluation on a benchmark.
|
||||||
"""Evaluate a list of rows on a benchmark.
|
|
||||||
|
|
||||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
# :param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
:param input_rows: The rows to evaluate.
|
# :param benchmark_config: The configuration for the benchmark.
|
||||||
:param scoring_functions: The scoring functions to use for the evaluation.
|
# :return: The job that was created to run the evaluation.
|
||||||
:param benchmark_config: The configuration for the benchmark.
|
# """
|
||||||
:return: EvaluateResponse object containing generations and scores
|
|
||||||
"""
|
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
|
# @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
|
||||||
async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
|
# async def evaluate_rows(
|
||||||
"""Get the status of a job.
|
# self,
|
||||||
|
# benchmark_id: str,
|
||||||
|
# input_rows: List[Dict[str, Any]],
|
||||||
|
# scoring_functions: List[str],
|
||||||
|
# benchmark_config: BenchmarkConfig,
|
||||||
|
# ) -> EvaluateResponse:
|
||||||
|
# """Evaluate a list of rows on a benchmark.
|
||||||
|
|
||||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
# :param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
:param job_id: The ID of the job to get the status of.
|
# :param input_rows: The rows to evaluate.
|
||||||
:return: The status of the evaluationjob.
|
# :param scoring_functions: The scoring functions to use for the evaluation.
|
||||||
"""
|
# :param benchmark_config: The configuration for the benchmark.
|
||||||
...
|
# :return: EvaluateResponse object containing generations and scores
|
||||||
|
# """
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
|
# @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
|
||||||
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
# async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
|
||||||
"""Cancel a job.
|
# """Get the status of a job.
|
||||||
|
|
||||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
# :param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
:param job_id: The ID of the job to cancel.
|
# :param job_id: The ID of the job to get the status of.
|
||||||
"""
|
# :return: The status of the evaluationjob.
|
||||||
...
|
# """
|
||||||
|
# ...
|
||||||
|
|
||||||
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
|
# @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
|
||||||
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
# async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
|
||||||
"""Get the result of a job.
|
# """Cancel a job.
|
||||||
|
|
||||||
:param benchmark_id: The ID of the benchmark to run the evaluation on.
|
# :param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
:param job_id: The ID of the job to get the result of.
|
# :param job_id: The ID of the job to cancel.
|
||||||
:return: The result of the job.
|
# """
|
||||||
"""
|
# ...
|
||||||
|
|
||||||
|
# @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
|
||||||
|
# async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
|
||||||
|
# """Get the result of a job.
|
||||||
|
|
||||||
|
# :param benchmark_id: The ID of the benchmark to run the evaluation on.
|
||||||
|
# :param job_id: The ID of the job to get the result of.
|
||||||
|
# :return: The result of the job.
|
||||||
|
# """
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue