From bf135f38b1e62a3cbf88ff6b5e3836a545c4cdc3 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 18 Mar 2025 20:48:03 -0700 Subject: [PATCH] precommit --- docs/_static/llama-stack-spec.html | 2 +- docs/_static/llama-stack-spec.yaml | 2 +- llama_stack/apis/evaluation/evaluation.py | 2 +- llama_stack/apis/graders/graders.py | 16 ++++++++-------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 09d4cb805..0f223b51b 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -8548,7 +8548,7 @@ }, "additionalProperties": false, "title": "EvaluationTask", - "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders." + "description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders." }, "GradeRequest": { "type": "object", diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 72361c50e..7c4ea81b8 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -5927,7 +5927,7 @@ components: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. - Use this when you have datasets and / or are iterating on your graders. - + Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py index e1f02dbae..269004b26 100644 --- a/llama_stack/apis/evaluation/evaluation.py +++ b/llama_stack/apis/evaluation/evaluation.py @@ -52,7 +52,7 @@ class EvaluationTask(BaseModel): """ A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id. Use this when you have a curated dataset and have settled on the graders. - - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. + - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids. Use this when you have datasets and / or are iterating on your graders. - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids. Prefer this when you are early in your evaluation cycle and experimenting much more with your data and graders. :param benchmark_id: The benchmark ID to evaluate. diff --git a/llama_stack/apis/graders/graders.py b/llama_stack/apis/graders/graders.py index 23c870e27..31e03b6d1 100644 --- a/llama_stack/apis/graders/graders.py +++ b/llama_stack/apis/graders/graders.py @@ -20,7 +20,7 @@ from typing import ( from pydantic import BaseModel, Field from llama_stack.apis.datasets import DatasetPurpose -from llama_stack.apis.resource import Resource, ResourceType +from llama_stack.apis.resource import Resource from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from .graders import * # noqa: F401 F403 @@ -96,37 +96,37 @@ class RegexParserGraderParams(BaseModel): @json_schema_type class LlmGrader(BaseModel): - type: Literal[GraderType.llm.value] = GraderType.llm.value + type: Literal["llm"] = "llm" llm: LlmGraderParams @json_schema_type class RegexParserGrader(BaseModel): - type: Literal[GraderType.regex_parser.value] = GraderType.regex_parser.value + type: Literal["regex_parser"] = "regex_parser" regex_parser: RegexParserGraderParams @json_schema_type class EqualityGrader(BaseModel): - type: Literal[GraderType.equality.value] = GraderType.equality.value + type: Literal["equality"] = "equality" equality: BasicGraderParams @json_schema_type class SubsetOfGrader(BaseModel): - type: Literal[GraderType.subset_of.value] = GraderType.subset_of.value + type: Literal["subset_of"] = "subset_of" subset_of: BasicGraderParams @json_schema_type class FactualityGrader(BaseModel): - type: Literal[GraderType.factuality.value] = GraderType.factuality.value + type: Literal["factuality"] = "factuality" factuality: BasicGraderParams @json_schema_type class FaithfulnessGrader(BaseModel): - type: Literal[GraderType.faithfulness.value] = GraderType.faithfulness.value + type: Literal["faithfulness"] = "faithfulness" faithfulness: BasicGraderParams @@ -157,7 +157,7 @@ class CommonGraderFields(BaseModel): @json_schema_type class Grader(CommonGraderFields, Resource): - type: Literal[ResourceType.grader.value] = ResourceType.grader.value + type: Literal["grader"] = "grader" @property def grader_id(self) -> str: