forked from phoenix-oss/llama-stack-mirror
feat(eval api): (2.2/n) delete eval / scoring / scoring_fn apis (#1700)
# What does this PR do? - To make it easier, delete existing `eval/scoring/scoring_function` apis. There will be a bunch of broken impls here. The sequence is: 1. migrate benchmark graders 2. clean up existing scoring functions - Add a skeleton evaluation impl to make tests pass. ## Test Plan tested in following PRs [//]: # (## Documentation)
This commit is contained in:
parent
0048274ec0
commit
c1d18283d2
113 changed files with 408 additions and 3900 deletions
|
@ -5,14 +5,12 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from llama_stack.apis.common.type_system import (
|
||||
ChatCompletionInputType,
|
||||
CompletionInputType,
|
||||
StringType,
|
||||
)
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
|
||||
|
||||
class ColumnName(Enum):
|
||||
|
@ -75,29 +73,31 @@ VALID_SCHEMAS_FOR_EVAL = [
|
|||
]
|
||||
|
||||
|
||||
def get_valid_schemas(api_str: str):
|
||||
if api_str == Api.scoring.value:
|
||||
return VALID_SCHEMAS_FOR_SCORING
|
||||
elif api_str == Api.eval.value:
|
||||
return VALID_SCHEMAS_FOR_EVAL
|
||||
else:
|
||||
raise ValueError(f"Invalid API string: {api_str}")
|
||||
# TODO(xiyan): add this back
|
||||
|
||||
# def get_valid_schemas(api_str: str):
|
||||
# if api_str == Api.scoring.value:
|
||||
# return VALID_SCHEMAS_FOR_SCORING
|
||||
# elif api_str == Api.eval.value:
|
||||
# return VALID_SCHEMAS_FOR_EVAL
|
||||
# else:
|
||||
# raise ValueError(f"Invalid API string: {api_str}")
|
||||
|
||||
|
||||
def validate_dataset_schema(
|
||||
dataset_schema: Dict[str, Any],
|
||||
expected_schemas: List[Dict[str, Any]],
|
||||
):
|
||||
if dataset_schema not in expected_schemas:
|
||||
raise ValueError(f"Dataset {dataset_schema} does not have a correct input schema in {expected_schemas}")
|
||||
# def validate_dataset_schema(
|
||||
# dataset_schema: Dict[str, Any],
|
||||
# expected_schemas: List[Dict[str, Any]],
|
||||
# ):
|
||||
# if dataset_schema not in expected_schemas:
|
||||
# raise ValueError(f"Dataset {dataset_schema} does not have a correct input schema in {expected_schemas}")
|
||||
|
||||
|
||||
def validate_row_schema(
|
||||
input_row: Dict[str, Any],
|
||||
expected_schemas: List[Dict[str, Any]],
|
||||
):
|
||||
for schema in expected_schemas:
|
||||
if all(key in input_row for key in schema):
|
||||
return
|
||||
# def validate_row_schema(
|
||||
# input_row: Dict[str, Any],
|
||||
# expected_schemas: List[Dict[str, Any]],
|
||||
# ):
|
||||
# for schema in expected_schemas:
|
||||
# if all(key in input_row for key in schema):
|
||||
# return
|
||||
|
||||
raise ValueError(f"Input row {input_row} does not match any of the expected schemas in {expected_schemas}")
|
||||
# raise ValueError(f"Input row {input_row} does not match any of the expected schemas in {expected_schemas}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue