wip

2026-01-03 08:42:15 +00:00 · 2025-03-12 00:09:03 -07:00 · 2025-03-12 00:09:03 -07:00 · 78b4cdad67
commit 78b4cdad67
parent 5c954dd033
2 changed files with 82 additions and 46 deletions
--- a/llama_stack/apis/common/job_types.py
+++ b/llama_stack/apis/common/job_types.py
@ -3,21 +3,34 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from datetime import datetime
 from enum import Enum
 from typing import Optional
 from pydantic import BaseModel
 from llama_stack.schema_utils import json_schema_type
@json_schema_type
 class Job(BaseModel):
    job_id: str
@json_schema_type
 class JobStatus(Enum):
    completed = "completed"
    in_progress = "in_progress"
    failed = "failed"
    scheduled = "scheduled"
    cancelled = "cancelled"
@json_schema_type
 class CommonJobFields(BaseModel):
    """Common fields for all jobs.
    :param id: The ID of the job.
    :param status: The status of the job.
    :param created_at: The time the job was created.
    :param finished_at: The time the job finished.
    """
    id: str
    status: JobStatus
    created_at: datetime
    finished_at: Optional[datetime] = None
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -83,15 +83,24 @@ class EvaluateResponse(BaseModel):
    scores: Dict[str, ScoringResult]
@json_schema_type
 class EvalJob(Job):
    """The EvalJob object representing a evaluation job that was created through API.
    :param job_id: The ID of the job.
    :param status: The status of the job.
    """
 class Eval(Protocol):
    """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
+    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
-    async def run_eval(
+    async def evaluate_benchmark(
        self,
        benchmark_id: str,
        benchmark_config: BenchmarkConfig,
-    ) -> Job:
+    ) -> EvalJob:
        """Run an evaluation on a benchmark.
        :param benchmark_id: The ID of the benchmark to run the evaluation on.
@ -99,47 +108,61 @@ class Eval(Protocol):
        :return: The job that was created to run the evaluation.
        """
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
+    # TODO: add these back in
-    async def evaluate_rows(
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
-        self,
+    # async def run_eval(
-        benchmark_id: str,
+    #     self,
-        input_rows: List[Dict[str, Any]],
+    #     benchmark_id: str,
-        scoring_functions: List[str],
+    #     benchmark_config: BenchmarkConfig,
-        benchmark_config: BenchmarkConfig,
+    # ) -> Job:
-    ) -> EvaluateResponse:
+    #     """Run an evaluation on a benchmark.
        """Evaluate a list of rows on a benchmark.
-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param input_rows: The rows to evaluate.
+    #     :param benchmark_config: The configuration for the benchmark.
-        :param scoring_functions: The scoring functions to use for the evaluation.
+    #     :return: The job that was created to run the evaluation.
-        :param benchmark_config: The configuration for the benchmark.
+    #     """
        :return: EvaluateResponse object containing generations and scores
        """
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
-    async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
+    # async def evaluate_rows(
-        """Get the status of a job.
+    #     self,
    #     benchmark_id: str,
    #     input_rows: List[Dict[str, Any]],
    #     scoring_functions: List[str],
    #     benchmark_config: BenchmarkConfig,
    # ) -> EvaluateResponse:
    #     """Evaluate a list of rows on a benchmark.
-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param job_id: The ID of the job to get the status of.
+    #     :param input_rows: The rows to evaluate.
-        :return: The status of the evaluationjob.
+    #     :param scoring_functions: The scoring functions to use for the evaluation.
-        """
+    #     :param benchmark_config: The configuration for the benchmark.
-        ...
+    #     :return: EvaluateResponse object containing generations and scores
    #     """
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
-    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+    # async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
-        """Cancel a job.
+    #     """Get the status of a job.
-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param job_id: The ID of the job to cancel.
+    #     :param job_id: The ID of the job to get the status of.
-        """
+    #     :return: The status of the evaluationjob.
-        ...
+    #     """
    #     ...
-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
-    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+    # async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
-        """Get the result of a job.
+    #     """Cancel a job.
-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param job_id: The ID of the job to get the result of.
+    #     :param job_id: The ID of the job to cancel.
-        :return: The result of the job.
+    #     """
-        """
+    #     ...
    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
    # async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
    #     """Get the result of a job.
    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
    #     :param job_id: The ID of the job to get the result of.
    #     :return: The result of the job.
    #     """