precommit

This commit is contained in:
Xi Yan 2025-03-18 20:48:03 -07:00
parent 205a50f10b
commit bf135f38b1
4 changed files with 11 additions and 11 deletions

View file

@ -8548,7 +8548,7 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"title": "EvaluationTask", "title": "EvaluationTask",
"description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders." "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
}, },
"GradeRequest": { "GradeRequest": {
"type": "object", "type": "object",

View file

@ -5927,7 +5927,7 @@ components:
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when
you have a curated dataset and have settled on the graders. - `dataset_id` you have a curated dataset and have settled on the graders. - `dataset_id`
and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids.
Use this when you have datasets and / or are iterating on your graders. - Use this when you have datasets and / or are iterating on your graders. -
`data_source` and `grader_ids`: Run evaluation task against a data source `data_source` and `grader_ids`: Run evaluation task against a data source
(e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are
early in your evaluation cycle and experimenting much more with your data early in your evaluation cycle and experimenting much more with your data

View file

@ -52,7 +52,7 @@ class EvaluationTask(BaseModel):
""" """
A task for evaluation. To specify a task, one of the following must be provided: A task for evaluation. To specify a task, one of the following must be provided:
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders.
- `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
- `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders.
:param benchmark_id: The benchmark ID to evaluate. :param benchmark_id: The benchmark ID to evaluate.

View file

@ -20,7 +20,7 @@ from typing import (
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.datasets import DatasetPurpose from llama_stack.apis.datasets import DatasetPurpose
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .graders import * # noqa: F401 F403 from .graders import * # noqa: F401 F403
@ -96,37 +96,37 @@ class RegexParserGraderParams(BaseModel):
@json_schema_type @json_schema_type
class LlmGrader(BaseModel): class LlmGrader(BaseModel):
type: Literal[GraderType.llm.value] = GraderType.llm.value type: Literal["llm"] = "llm"
llm: LlmGraderParams llm: LlmGraderParams
@json_schema_type @json_schema_type
class RegexParserGrader(BaseModel): class RegexParserGrader(BaseModel):
type: Literal[GraderType.regex_parser.value] = GraderType.regex_parser.value type: Literal["regex_parser"] = "regex_parser"
regex_parser: RegexParserGraderParams regex_parser: RegexParserGraderParams
@json_schema_type @json_schema_type
class EqualityGrader(BaseModel): class EqualityGrader(BaseModel):
type: Literal[GraderType.equality.value] = GraderType.equality.value type: Literal["equality"] = "equality"
equality: BasicGraderParams equality: BasicGraderParams
@json_schema_type @json_schema_type
class SubsetOfGrader(BaseModel): class SubsetOfGrader(BaseModel):
type: Literal[GraderType.subset_of.value] = GraderType.subset_of.value type: Literal["subset_of"] = "subset_of"
subset_of: BasicGraderParams subset_of: BasicGraderParams
@json_schema_type @json_schema_type
class FactualityGrader(BaseModel): class FactualityGrader(BaseModel):
type: Literal[GraderType.factuality.value] = GraderType.factuality.value type: Literal["factuality"] = "factuality"
factuality: BasicGraderParams factuality: BasicGraderParams
@json_schema_type @json_schema_type
class FaithfulnessGrader(BaseModel): class FaithfulnessGrader(BaseModel):
type: Literal[GraderType.faithfulness.value] = GraderType.faithfulness.value type: Literal["faithfulness"] = "faithfulness"
faithfulness: BasicGraderParams faithfulness: BasicGraderParams
@ -157,7 +157,7 @@ class CommonGraderFields(BaseModel):
@json_schema_type @json_schema_type
class Grader(CommonGraderFields, Resource): class Grader(CommonGraderFields, Resource):
type: Literal[ResourceType.grader.value] = ResourceType.grader.value type: Literal["grader"] = "grader"
@property @property
def grader_id(self) -> str: def grader_id(self) -> str: