tmp

2025-12-31 07:59:59 +00:00 · 2025-03-12 00:39:49 -07:00 · 2025-03-12 00:39:49 -07:00 · 4acd1e404e
commit 4acd1e404e
parent 3e0d4901da
2 changed files with 12 additions and 7 deletions
--- a/llama_stack/apis/common/job_types.py
+++ b/llama_stack/apis/common/job_types.py
@ -28,9 +28,11 @@ class CommonJobFields(BaseModel):
    :param status: The status of the job.
    :param created_at: The time the job was created.
    :param finished_at: The time the job finished.
+    :param error: If status of the job is failed, this will contain the error message.
    """

    id: str
    status: JobStatus
    created_at: datetime
    finished_at: Optional[datetime] = None
+    error: Optional[str] = None
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
 from typing_extensions import Annotated

 from llama_stack.apis.agents import AgentConfig
-from llama_stack.apis.common.job_types import Job, JobStatus
+from llama_stack.apis.common.job_types import CommonJobFields, JobStatus
 from llama_stack.apis.inference import SamplingParams, SystemMessage
 from llama_stack.apis.scoring import ScoringResult
 from llama_stack.apis.scoring_functions import ScoringFnParams
@ -84,12 +84,15 @@ class EvaluateResponse(BaseModel):


@json_schema_type
-class EvalJob(Job):
-    """The EvalJob object representing a evaluation job that was created through API.
+class EvalJob(CommonJobFields):
+    """The EvalJob object representing a evaluation job that was created through API."""

-    :param job_id: The ID of the job.
-    :param status: The status of the job.
-    """
+    type: Literal["eval"] = "eval"
+    # TODO: result files or result datasets ids?
+    result_files: List[str] = Field(
+        default_factory=list,
+        description="Result files of an evaluation run. Which can be queried for results.",
+    )


 class Eval(Protocol):
@ -99,7 +102,7 @@ class Eval(Protocol):
    async def evaluate_benchmark(
        self,
        benchmark_id: str,
-        benchmark_config: BenchmarkConfig,
+        candidate: EvalCandidate,
    ) -> EvalJob:
        """Run an evaluation on a benchmark.