wip

2026-01-05 01:12:16 +00:00 · 2025-03-12 00:09:03 -07:00 · 2025-03-12 00:09:03 -07:00 · 78b4cdad67
commit 78b4cdad67
parent 5c954dd033
2 changed files with 82 additions and 46 deletions
--- a/llama_stack/apis/common/job_types.py
+++ b/llama_stack/apis/common/job_types.py
@ -3,21 +3,34 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from datetime import datetime
 from enum import Enum
+from typing import Optional

 from pydantic import BaseModel

 from llama_stack.schema_utils import json_schema_type


-@json_schema_type
-class Job(BaseModel):
-    job_id: str
-
-
-@json_schema_type
 class JobStatus(Enum):
    completed = "completed"
    in_progress = "in_progress"
    failed = "failed"
    scheduled = "scheduled"
+    cancelled = "cancelled"
+
+
+@json_schema_type
+class CommonJobFields(BaseModel):
+    """Common fields for all jobs.
+
+    :param id: The ID of the job.
+    :param status: The status of the job.
+    :param created_at: The time the job was created.
+    :param finished_at: The time the job finished.
+    """
+
+    id: str
+    status: JobStatus
+    created_at: datetime
+    finished_at: Optional[datetime] = None
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -83,15 +83,24 @@ class EvaluateResponse(BaseModel):
    scores: Dict[str, ScoringResult]


+@json_schema_type
+class EvalJob(Job):
+    """The EvalJob object representing a evaluation job that was created through API.
+
+    :param job_id: The ID of the job.
+    :param status: The status of the job.
+    """
+
+
 class Eval(Protocol):
    """Llama Stack Evaluation API for running evaluations on model and agent candidates."""

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
-    async def run_eval(
+    @webmethod(route="/eval/benchmark/{benchmark_id}/jobs", method="POST")
+    async def evaluate_benchmark(
        self,
        benchmark_id: str,
        benchmark_config: BenchmarkConfig,
-    ) -> Job:
+    ) -> EvalJob:
        """Run an evaluation on a benchmark.

        :param benchmark_id: The ID of the benchmark to run the evaluation on.
@ -99,47 +108,61 @@ class Eval(Protocol):
        :return: The job that was created to run the evaluation.
        """

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
-    async def evaluate_rows(
-        self,
-        benchmark_id: str,
-        input_rows: List[Dict[str, Any]],
-        scoring_functions: List[str],
-        benchmark_config: BenchmarkConfig,
-    ) -> EvaluateResponse:
-        """Evaluate a list of rows on a benchmark.
+    # TODO: add these back in
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
+    # async def run_eval(
+    #     self,
+    #     benchmark_id: str,
+    #     benchmark_config: BenchmarkConfig,
+    # ) -> Job:
+    #     """Run an evaluation on a benchmark.

-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param input_rows: The rows to evaluate.
-        :param scoring_functions: The scoring functions to use for the evaluation.
-        :param benchmark_config: The configuration for the benchmark.
-        :return: EvaluateResponse object containing generations and scores
-        """
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param benchmark_config: The configuration for the benchmark.
+    #     :return: The job that was created to run the evaluation.
+    #     """

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
-    async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
-        """Get the status of a job.
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
+    # async def evaluate_rows(
+    #     self,
+    #     benchmark_id: str,
+    #     input_rows: List[Dict[str, Any]],
+    #     scoring_functions: List[str],
+    #     benchmark_config: BenchmarkConfig,
+    # ) -> EvaluateResponse:
+    #     """Evaluate a list of rows on a benchmark.

-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param job_id: The ID of the job to get the status of.
-        :return: The status of the evaluationjob.
-        """
-        ...
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param input_rows: The rows to evaluate.
+    #     :param scoring_functions: The scoring functions to use for the evaluation.
+    #     :param benchmark_config: The configuration for the benchmark.
+    #     :return: EvaluateResponse object containing generations and scores
+    #     """

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
-    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
-        """Cancel a job.
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
+    # async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
+    #     """Get the status of a job.

-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param job_id: The ID of the job to cancel.
-        """
-        ...
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param job_id: The ID of the job to get the status of.
+    #     :return: The status of the evaluationjob.
+    #     """
+    #     ...

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
-    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
-        """Get the result of a job.
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    # async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+    #     """Cancel a job.

-        :param benchmark_id: The ID of the benchmark to run the evaluation on.
-        :param job_id: The ID of the job to get the result of.
-        :return: The result of the job.
-        """
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param job_id: The ID of the job to cancel.
+    #     """
+    #     ...
+
+    # @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
+    # async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+    #     """Get the result of a job.
+
+    #     :param benchmark_id: The ID of the benchmark to run the evaluation on.
+    #     :param job_id: The ID of the job to get the result of.
+    #     :return: The result of the job.
+    #     """