diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 09d4cb805..0f223b51b 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -8548,7 +8548,7 @@
},
"additionalProperties": false,
"title": "EvaluationTask",
- "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
+ "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
},
"GradeRequest": {
"type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 72361c50e..7c4ea81b8 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -5927,7 +5927,7 @@ components:
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when
you have a curated dataset and have settled on the graders. - `dataset_id`
and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids.
- Use this when you have datasets and / or are iterating on your graders. -
+ Use this when you have datasets and / or are iterating on your graders. -
`data_source` and `grader_ids`: Run evaluation task against a data source
(e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are
early in your evaluation cycle and experimenting much more with your data
diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py
index e1f02dbae..269004b26 100644
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@@ -52,7 +52,7 @@ class EvaluationTask(BaseModel):
"""
A task for evaluation. To specify a task, one of the following must be provided:
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders.
- - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
+ - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
- `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders.
:param benchmark_id: The benchmark ID to evaluate.
diff --git a/llama_stack/apis/graders/graders.py b/llama_stack/apis/graders/graders.py
index 23c870e27..31e03b6d1 100644
--- a/llama_stack/apis/graders/graders.py
+++ b/llama_stack/apis/graders/graders.py
@@ -20,7 +20,7 @@ from typing import (
from pydantic import BaseModel, Field
from llama_stack.apis.datasets import DatasetPurpose
-from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.resource import Resource
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .graders import * # noqa: F401 F403
@@ -96,37 +96,37 @@ class RegexParserGraderParams(BaseModel):
@json_schema_type
class LlmGrader(BaseModel):
- type: Literal[GraderType.llm.value] = GraderType.llm.value
+ type: Literal["llm"] = "llm"
llm: LlmGraderParams
@json_schema_type
class RegexParserGrader(BaseModel):
- type: Literal[GraderType.regex_parser.value] = GraderType.regex_parser.value
+ type: Literal["regex_parser"] = "regex_parser"
regex_parser: RegexParserGraderParams
@json_schema_type
class EqualityGrader(BaseModel):
- type: Literal[GraderType.equality.value] = GraderType.equality.value
+ type: Literal["equality"] = "equality"
equality: BasicGraderParams
@json_schema_type
class SubsetOfGrader(BaseModel):
- type: Literal[GraderType.subset_of.value] = GraderType.subset_of.value
+ type: Literal["subset_of"] = "subset_of"
subset_of: BasicGraderParams
@json_schema_type
class FactualityGrader(BaseModel):
- type: Literal[GraderType.factuality.value] = GraderType.factuality.value
+ type: Literal["factuality"] = "factuality"
factuality: BasicGraderParams
@json_schema_type
class FaithfulnessGrader(BaseModel):
- type: Literal[GraderType.faithfulness.value] = GraderType.faithfulness.value
+ type: Literal["faithfulness"] = "faithfulness"
faithfulness: BasicGraderParams
@@ -157,7 +157,7 @@ class CommonGraderFields(BaseModel):
@json_schema_type
class Grader(CommonGraderFields, Resource):
- type: Literal[ResourceType.grader.value] = ResourceType.grader.value
+ type: Literal["grader"] = "grader"
@property
def grader_id(self) -> str: