forked from phoenix-oss/llama-stack-mirror
precommit
This commit is contained in:
parent
205a50f10b
commit
bf135f38b1
4 changed files with 11 additions and 11 deletions
2
docs/_static/llama-stack-spec.html
vendored
2
docs/_static/llama-stack-spec.html
vendored
|
@ -8548,7 +8548,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"title": "EvaluationTask",
|
||||
"description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
|
||||
"description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders."
|
||||
},
|
||||
"GradeRequest": {
|
||||
"type": "object",
|
||||
|
|
2
docs/_static/llama-stack-spec.yaml
vendored
2
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5927,7 +5927,7 @@ components:
|
|||
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when
|
||||
you have a curated dataset and have settled on the graders. - `dataset_id`
|
||||
and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids.
|
||||
Use this when you have datasets and / or are iterating on your graders. -
|
||||
Use this when you have datasets and / or are iterating on your graders. -
|
||||
`data_source` and `grader_ids`: Run evaluation task against a data source
|
||||
(e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are
|
||||
early in your evaluation cycle and experimenting much more with your data
|
||||
|
|
|
@ -52,7 +52,7 @@ class EvaluationTask(BaseModel):
|
|||
"""
|
||||
A task for evaluation. To specify a task, one of the following must be provided:
|
||||
- `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders.
|
||||
- `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
|
||||
- `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders.
|
||||
- `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders.
|
||||
|
||||
:param benchmark_id: The benchmark ID to evaluate.
|
||||
|
|
|
@ -20,7 +20,7 @@ from typing import (
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.datasets import DatasetPurpose
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.resource import Resource
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
from .graders import * # noqa: F401 F403
|
||||
|
@ -96,37 +96,37 @@ class RegexParserGraderParams(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class LlmGrader(BaseModel):
|
||||
type: Literal[GraderType.llm.value] = GraderType.llm.value
|
||||
type: Literal["llm"] = "llm"
|
||||
llm: LlmGraderParams
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RegexParserGrader(BaseModel):
|
||||
type: Literal[GraderType.regex_parser.value] = GraderType.regex_parser.value
|
||||
type: Literal["regex_parser"] = "regex_parser"
|
||||
regex_parser: RegexParserGraderParams
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class EqualityGrader(BaseModel):
|
||||
type: Literal[GraderType.equality.value] = GraderType.equality.value
|
||||
type: Literal["equality"] = "equality"
|
||||
equality: BasicGraderParams
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class SubsetOfGrader(BaseModel):
|
||||
type: Literal[GraderType.subset_of.value] = GraderType.subset_of.value
|
||||
type: Literal["subset_of"] = "subset_of"
|
||||
subset_of: BasicGraderParams
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class FactualityGrader(BaseModel):
|
||||
type: Literal[GraderType.factuality.value] = GraderType.factuality.value
|
||||
type: Literal["factuality"] = "factuality"
|
||||
factuality: BasicGraderParams
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class FaithfulnessGrader(BaseModel):
|
||||
type: Literal[GraderType.faithfulness.value] = GraderType.faithfulness.value
|
||||
type: Literal["faithfulness"] = "faithfulness"
|
||||
faithfulness: BasicGraderParams
|
||||
|
||||
|
||||
|
@ -157,7 +157,7 @@ class CommonGraderFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Grader(CommonGraderFields, Resource):
|
||||
type: Literal[ResourceType.grader.value] = ResourceType.grader.value
|
||||
type: Literal["grader"] = "grader"
|
||||
|
||||
@property
|
||||
def grader_id(self) -> str:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue