precommit

2025-03-18 20:48:03 -07:00 · 2025-03-18 20:48:03 -07:00 · bf135f38b1
commit bf135f38b1
parent 205a50f10b
4 changed files with 11 additions and 11 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -8548,7 +8548,7 @@
                },
                "additionalProperties": false,
                "title": "EvaluationTask",
-                "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.  - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
+                "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
            },
            "GradeRequest": {
                "type": "object",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -5927,7 +5927,7 @@ components:
        - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when
        you have a curated dataset and have settled on the graders. - `dataset_id`
        and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids.
-        Use this when you have datasets and / or are iterating on your graders.  -
+        Use this when you have datasets and / or are iterating on your graders. -
        `data_source` and `grader_ids`: Run evaluation task against a data source
        (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are
        early in your evaluation cycle and experimenting much more with your data
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@ -52,7 +52,7 @@ class EvaluationTask(BaseModel):
    """
    A task for evaluation. To specify a task, one of the following must be provided:
    - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders.
-    - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. 
+    - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
    - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders.

    :param benchmark_id: The benchmark ID to evaluate.
--- a/llama_stack/apis/graders/graders.py
+++ b/llama_stack/apis/graders/graders.py
@ -20,7 +20,7 @@ from typing import (
 from pydantic import BaseModel, Field

 from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.resource import Resource
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

 from .graders import *  # noqa: F401 F403
@ -96,37 +96,37 @@ class RegexParserGraderParams(BaseModel):

@json_schema_type
 class LlmGrader(BaseModel):
-    type: Literal[GraderType.llm.value] = GraderType.llm.value
+    type: Literal["llm"] = "llm"
    llm: LlmGraderParams


@json_schema_type
 class RegexParserGrader(BaseModel):
-    type: Literal[GraderType.regex_parser.value] = GraderType.regex_parser.value
+    type: Literal["regex_parser"] = "regex_parser"
    regex_parser: RegexParserGraderParams


@json_schema_type
 class EqualityGrader(BaseModel):
-    type: Literal[GraderType.equality.value] = GraderType.equality.value
+    type: Literal["equality"] = "equality"
    equality: BasicGraderParams


@json_schema_type
 class SubsetOfGrader(BaseModel):
-    type: Literal[GraderType.subset_of.value] = GraderType.subset_of.value
+    type: Literal["subset_of"] = "subset_of"
    subset_of: BasicGraderParams


@json_schema_type
 class FactualityGrader(BaseModel):
-    type: Literal[GraderType.factuality.value] = GraderType.factuality.value
+    type: Literal["factuality"] = "factuality"
    factuality: BasicGraderParams


@json_schema_type
 class FaithfulnessGrader(BaseModel):
-    type: Literal[GraderType.faithfulness.value] = GraderType.faithfulness.value
+    type: Literal["faithfulness"] = "faithfulness"
    faithfulness: BasicGraderParams


@ -157,7 +157,7 @@ class CommonGraderFields(BaseModel):

@json_schema_type
 class Grader(CommonGraderFields, Resource):
-    type: Literal[ResourceType.grader.value] = ResourceType.grader.value
+    type: Literal["grader"] = "grader"

    @property
    def grader_id(self) -> str: