precommit

This commit is contained in:
Xi Yan 2025-03-18 20:48:03 -07:00
parent 205a50f10b
commit bf135f38b1
4 changed files with 11 additions and 11 deletions

View file

@ -8548,7 +8548,7 @@
},
"additionalProperties": false,
"title": "EvaluationTask",
"description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
"description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
},
"GradeRequest": {
"type": "object",

View file

@ -5927,7 +5927,7 @@ components:
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when
you have a curated dataset and have settled on the graders. - `dataset_id`
and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids.
Use this when you have datasets and / or are iterating on your graders. -
Use this when you have datasets and / or are iterating on your graders. -
`data_source` and `grader_ids`: Run evaluation task against a data source
(e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are
early in your evaluation cycle and experimenting much more with your data

View file

@ -52,7 +52,7 @@ class EvaluationTask(BaseModel):
"""
A task for evaluation. To specify a task, one of the following must be provided:
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders.
- `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
- `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
- `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders.
:param benchmark_id: The benchmark ID to evaluate.

View file

@ -20,7 +20,7 @@ from typing import (
from pydantic import BaseModel, Field
from llama_stack.apis.datasets import DatasetPurpose
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.resource import Resource
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .graders import * # noqa: F401 F403
@ -96,37 +96,37 @@ class RegexParserGraderParams(BaseModel):
@json_schema_type
class LlmGrader(BaseModel):
type: Literal[GraderType.llm.value] = GraderType.llm.value
type: Literal["llm"] = "llm"
llm: LlmGraderParams
@json_schema_type
class RegexParserGrader(BaseModel):
type: Literal[GraderType.regex_parser.value] = GraderType.regex_parser.value
type: Literal["regex_parser"] = "regex_parser"
regex_parser: RegexParserGraderParams
@json_schema_type
class EqualityGrader(BaseModel):
type: Literal[GraderType.equality.value] = GraderType.equality.value
type: Literal["equality"] = "equality"
equality: BasicGraderParams
@json_schema_type
class SubsetOfGrader(BaseModel):
type: Literal[GraderType.subset_of.value] = GraderType.subset_of.value
type: Literal["subset_of"] = "subset_of"
subset_of: BasicGraderParams
@json_schema_type
class FactualityGrader(BaseModel):
type: Literal[GraderType.factuality.value] = GraderType.factuality.value
type: Literal["factuality"] = "factuality"
factuality: BasicGraderParams
@json_schema_type
class FaithfulnessGrader(BaseModel):
type: Literal[GraderType.faithfulness.value] = GraderType.faithfulness.value
type: Literal["faithfulness"] = "faithfulness"
faithfulness: BasicGraderParams
@ -157,7 +157,7 @@ class CommonGraderFields(BaseModel):
@json_schema_type
class Grader(CommonGraderFields, Resource):
type: Literal[ResourceType.grader.value] = ResourceType.grader.value
type: Literal["grader"] = "grader"
@property
def grader_id(self) -> str: