better params fields

This commit is contained in:
Xi Yan 2025-03-12 21:31:22 -07:00
parent 93c131ed5f
commit a7abe6df74
3 changed files with 763 additions and 571 deletions

View file

@ -6388,90 +6388,114 @@
"AnswerCorrectnessScoringFnParams": { "AnswerCorrectnessScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "answer_correctness", "const": "answer_correctness",
"default": "answer_correctness" "default": "answer_correctness"
},
"answer_correctness": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"answer_correctness"
], ],
"title": "AnswerCorrectnessScoringFnParams" "title": "AnswerCorrectnessScoringFnParams"
}, },
"AnswerRelevancyScoringFnParams": { "AnswerRelevancyScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "answer_relevancy", "const": "answer_relevancy",
"default": "answer_relevancy" "default": "answer_relevancy"
},
"answer_relevancy": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"answer_relevancy"
], ],
"title": "AnswerRelevancyScoringFnParams" "title": "AnswerRelevancyScoringFnParams"
}, },
"AnswerSimilarityScoringFnParams": { "AnswerSimilarityScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "answer_similarity", "const": "answer_similarity",
"default": "answer_similarity" "default": "answer_similarity"
},
"answer_similarity": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"answer_similarity"
], ],
"title": "AnswerSimilarityScoringFnParams" "title": "AnswerSimilarityScoringFnParams"
}, },
@ -6505,150 +6529,234 @@
"ContextEntityRecallScoringFnParams": { "ContextEntityRecallScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "context_entity_recall", "const": "context_entity_recall",
"default": "context_entity_recall" "default": "context_entity_recall"
},
"context_entity_recall": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_entity_recall"
], ],
"title": "ContextEntityRecallScoringFnParams" "title": "ContextEntityRecallScoringFnParams"
}, },
"ContextPrecisionScoringFnParams": { "ContextPrecisionScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "context_precision", "const": "context_precision",
"default": "context_precision" "default": "context_precision"
},
"context_precision": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_precision"
], ],
"title": "ContextPrecisionScoringFnParams" "title": "ContextPrecisionScoringFnParams"
}, },
"ContextRecallScoringFnParams": { "ContextRecallScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "context_recall", "const": "context_recall",
"default": "context_recall" "default": "context_recall"
},
"context_recall": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_recall"
], ],
"title": "ContextRecallScoringFnParams" "title": "ContextRecallScoringFnParams"
}, },
"ContextRelevancyScoringFnParams": { "ContextRelevancyScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "context_relevancy", "const": "context_relevancy",
"default": "context_relevancy" "default": "context_relevancy"
},
"context_relevancy": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_relevancy"
], ],
"title": "ContextRelevancyScoringFnParams" "title": "ContextRelevancyScoringFnParams"
}, },
"CustomLLMAsJudgeScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom_llm_as_judge",
"default": "custom_llm_as_judge"
},
"custom_llm_as_judge": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom_llm_as_judge",
"default": "custom_llm_as_judge"
},
"judge_model": {
"type": "string"
},
"prompt_template": {
"type": "string"
},
"judge_score_regexes": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"type",
"judge_model"
],
"title": "CustomLLMAsJudgeScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
"type",
"custom_llm_as_judge"
],
"title": "CustomLLMAsJudgeScoringFnParams"
},
"EqualityScoringFnParams": { "EqualityScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "equality", "const": "equality",
"default": "equality" "default": "equality"
},
"equality": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"equality"
], ],
"title": "EqualityScoringFnParams" "title": "EqualityScoringFnParams"
}, },
@ -6672,104 +6780,78 @@
"FactualityScoringFnParams": { "FactualityScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "factuality", "const": "factuality",
"default": "factuality" "default": "factuality"
},
"factuality": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"factuality"
], ],
"title": "FactualityScoringFnParams" "title": "FactualityScoringFnParams"
}, },
"FaithfulnessScoringFnParams": { "FaithfulnessScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "faithfulness", "const": "faithfulness",
"default": "faithfulness" "default": "faithfulness"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "FaithfulnessScoringFnParams"
},
"LLMAsJudgeScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom_llm_as_judge",
"default": "custom_llm_as_judge"
}, },
"judge_model": { "faithfulness": {
"type": "string" "type": "object",
}, "properties": {
"prompt_template": { "aggregation_functions": {
"type": "string" "type": "array",
}, "items": {
"judge_score_regexes": { "type": "string",
"type": "array", "enum": [
"items": { "average",
"type": "string" "median",
} "categorical_count",
}, "accuracy"
"aggregation_functions": { ],
"type": "array", "title": "AggregationFunctionType",
"items": { "description": "A type of aggregation function."
"type": "string", },
"enum": [ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
"average", }
"median", },
"categorical_count", "additionalProperties": false,
"accuracy" "title": "BasicScoringFnParamsFields"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type", "type",
"judge_model" "faithfulness"
], ],
"title": "LLMAsJudgeScoringFnParams" "title": "FaithfulnessScoringFnParams"
}, },
"ModelCandidate": { "ModelCandidate": {
"type": "object", "type": "object",
@ -6804,83 +6886,103 @@
"RegexParserMathScoringFnParams": { "RegexParserMathScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"parsing_regexes": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Regexes to extract the answer from generated response."
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "regex_parser_math_response", "const": "regex_parser_math_response",
"default": "regex_parser_math_response" "default": "regex_parser_math_response"
},
"regex_parser_math_response": {
"type": "object",
"properties": {
"parsing_regexes": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Regexes to extract the answer from generated response."
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"required": [
"parsing_regexes"
],
"title": "RegexParserScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"parsing_regexes", "type",
"type" "regex_parser_math_response"
], ],
"title": "RegexParserMathScoringFnParams" "title": "RegexParserMathScoringFnParams"
}, },
"RegexParserScoringFnParams": { "RegexParserScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"parsing_regexes": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Regexes to extract the answer from generated response."
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "regex_parser", "const": "regex_parser",
"default": "regex_parser" "default": "regex_parser"
},
"regex_parser": {
"type": "object",
"properties": {
"parsing_regexes": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Regexes to extract the answer from generated response."
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"required": [
"parsing_regexes"
],
"title": "RegexParserScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"parsing_regexes", "type",
"type" "regex_parser"
], ],
"title": "RegexParserScoringFnParams" "title": "RegexParserScoringFnParams"
}, },
"ScoringFnParams": { "ScoringFnParams": {
"oneOf": [ "oneOf": [
{ {
"$ref": "#/components/schemas/LLMAsJudgeScoringFnParams" "$ref": "#/components/schemas/CustomLLMAsJudgeScoringFnParams"
}, },
{ {
"$ref": "#/components/schemas/RegexParserScoringFnParams" "$ref": "#/components/schemas/RegexParserScoringFnParams"
@ -6925,7 +7027,7 @@
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams", "custom_llm_as_judge": "#/components/schemas/CustomLLMAsJudgeScoringFnParams",
"regex_parser": "#/components/schemas/RegexParserScoringFnParams", "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
"regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams", "regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams",
"equality": "#/components/schemas/EqualityScoringFnParams", "equality": "#/components/schemas/EqualityScoringFnParams",
@ -6945,30 +7047,38 @@
"SubsetOfcoringFnParams": { "SubsetOfcoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "subset_of", "const": "subset_of",
"default": "subset_of" "default": "subset_of"
},
"subset_of": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"subset_of"
], ],
"title": "SubsetOfcoringFnParams" "title": "SubsetOfcoringFnParams"
}, },

View file

@ -4451,77 +4451,95 @@ components:
AnswerCorrectnessScoringFnParams: AnswerCorrectnessScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: answer_correctness const: answer_correctness
default: answer_correctness default: answer_correctness
answer_correctness:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- answer_correctness
title: AnswerCorrectnessScoringFnParams title: AnswerCorrectnessScoringFnParams
AnswerRelevancyScoringFnParams: AnswerRelevancyScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: answer_relevancy const: answer_relevancy
default: answer_relevancy default: answer_relevancy
answer_relevancy:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- answer_relevancy
title: AnswerRelevancyScoringFnParams title: AnswerRelevancyScoringFnParams
AnswerSimilarityScoringFnParams: AnswerSimilarityScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: answer_similarity const: answer_similarity
default: answer_similarity default: answer_similarity
answer_similarity:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- answer_similarity
title: AnswerSimilarityScoringFnParams title: AnswerSimilarityScoringFnParams
BenchmarkConfig: BenchmarkConfig:
type: object type: object
@ -4551,127 +4569,189 @@ components:
ContextEntityRecallScoringFnParams: ContextEntityRecallScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_entity_recall const: context_entity_recall
default: context_entity_recall default: context_entity_recall
context_entity_recall:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_entity_recall
title: ContextEntityRecallScoringFnParams title: ContextEntityRecallScoringFnParams
ContextPrecisionScoringFnParams: ContextPrecisionScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_precision const: context_precision
default: context_precision default: context_precision
context_precision:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_precision
title: ContextPrecisionScoringFnParams title: ContextPrecisionScoringFnParams
ContextRecallScoringFnParams: ContextRecallScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_recall const: context_recall
default: context_recall default: context_recall
context_recall:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_recall
title: ContextRecallScoringFnParams title: ContextRecallScoringFnParams
ContextRelevancyScoringFnParams: ContextRelevancyScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_relevancy const: context_relevancy
default: context_relevancy default: context_relevancy
context_relevancy:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_relevancy
title: ContextRelevancyScoringFnParams title: ContextRelevancyScoringFnParams
CustomLLMAsJudgeScoringFnParams:
type: object
properties:
type:
type: string
const: custom_llm_as_judge
default: custom_llm_as_judge
custom_llm_as_judge:
type: object
properties:
type:
type: string
const: custom_llm_as_judge
default: custom_llm_as_judge
judge_model:
type: string
prompt_template:
type: string
judge_score_regexes:
type: array
items:
type: string
additionalProperties: false
required:
- type
- judge_model
title: CustomLLMAsJudgeScoringFnParamsFields
additionalProperties: false
required:
- type
- custom_llm_as_judge
title: CustomLLMAsJudgeScoringFnParams
EqualityScoringFnParams: EqualityScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: equality const: equality
default: equality default: equality
equality:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- equality
title: EqualityScoringFnParams title: EqualityScoringFnParams
EvalCandidate: EvalCandidate:
oneOf: oneOf:
@ -4685,84 +4765,65 @@ components:
FactualityScoringFnParams: FactualityScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: factuality const: factuality
default: factuality default: factuality
factuality:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- factuality
title: FactualityScoringFnParams title: FactualityScoringFnParams
FaithfulnessScoringFnParams: FaithfulnessScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: faithfulness const: faithfulness
default: faithfulness default: faithfulness
faithfulness:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- faithfulness
title: FaithfulnessScoringFnParams title: FaithfulnessScoringFnParams
LLMAsJudgeScoringFnParams:
type: object
properties:
type:
type: string
const: custom_llm_as_judge
default: custom_llm_as_judge
judge_model:
type: string
prompt_template:
type: string
judge_score_regexes:
type: array
items:
type: string
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- type
- judge_model
title: LLMAsJudgeScoringFnParams
ModelCandidate: ModelCandidate:
type: object type: object
properties: properties:
@ -4791,70 +4852,84 @@ components:
RegexParserMathScoringFnParams: RegexParserMathScoringFnParams:
type: object type: object
properties: properties:
parsing_regexes:
type: array
items:
type: string
description: >-
(Optional) Regexes to extract the answer from generated response.
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: regex_parser_math_response const: regex_parser_math_response
default: regex_parser_math_response default: regex_parser_math_response
regex_parser_math_response:
type: object
properties:
parsing_regexes:
type: array
items:
type: string
description: >-
(Optional) Regexes to extract the answer from generated response.
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
required:
- parsing_regexes
title: RegexParserScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- parsing_regexes
- type - type
- regex_parser_math_response
title: RegexParserMathScoringFnParams title: RegexParserMathScoringFnParams
RegexParserScoringFnParams: RegexParserScoringFnParams:
type: object type: object
properties: properties:
parsing_regexes:
type: array
items:
type: string
description: >-
(Optional) Regexes to extract the answer from generated response.
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: regex_parser const: regex_parser
default: regex_parser default: regex_parser
regex_parser:
type: object
properties:
parsing_regexes:
type: array
items:
type: string
description: >-
(Optional) Regexes to extract the answer from generated response.
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
required:
- parsing_regexes
title: RegexParserScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- parsing_regexes
- type - type
- regex_parser
title: RegexParserScoringFnParams title: RegexParserScoringFnParams
ScoringFnParams: ScoringFnParams:
oneOf: oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - $ref: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
- $ref: '#/components/schemas/RegexParserScoringFnParams' - $ref: '#/components/schemas/RegexParserScoringFnParams'
- $ref: '#/components/schemas/RegexParserMathScoringFnParams' - $ref: '#/components/schemas/RegexParserMathScoringFnParams'
- $ref: '#/components/schemas/EqualityScoringFnParams' - $ref: '#/components/schemas/EqualityScoringFnParams'
@ -4871,7 +4946,7 @@ components:
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' custom_llm_as_judge: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams' regex_parser: '#/components/schemas/RegexParserScoringFnParams'
regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams' regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams'
equality: '#/components/schemas/EqualityScoringFnParams' equality: '#/components/schemas/EqualityScoringFnParams'
@ -4888,27 +4963,33 @@ components:
SubsetOfcoringFnParams: SubsetOfcoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: subset_of const: subset_of
default: subset_of default: subset_of
subset_of:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- subset_of
title: SubsetOfcoringFnParams title: SubsetOfcoringFnParams
EvaluateRowsRequest: EvaluateRowsRequest:
type: object type: object

View file

@ -67,7 +67,7 @@ class AggregationFunctionType(Enum):
accuracy = "accuracy" accuracy = "accuracy"
class BasicScoringFnParamsCommon(BaseModel): class BasicScoringFnParamsFields(BaseModel):
""" """
:param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed. :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
""" """
@ -78,7 +78,7 @@ class BasicScoringFnParamsCommon(BaseModel):
) )
class RegexParserScoringFnParamsCommon(BaseModel): class RegexParserScoringFnParamsFields(BaseModel):
""" """
:param parsing_regexes: (Optional) Regexes to extract the answer from generated response. :param parsing_regexes: (Optional) Regexes to extract the answer from generated response.
:param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed. :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
@ -93,74 +93,7 @@ class RegexParserScoringFnParamsCommon(BaseModel):
default_factory=list, default_factory=list,
) )
class CustomLLMAsJudgeScoringFnParamsFields(BaseModel):
@json_schema_type
class RegexParserScoringFnParams(RegexParserScoringFnParamsCommon):
type: Literal["regex_parser"] = "regex_parser"
@json_schema_type
class RegexParserMathScoringFnParams(RegexParserScoringFnParamsCommon):
type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
@json_schema_type
class EqualityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["equality"] = "equality"
@json_schema_type
class SubsetOfcoringFnParams(BasicScoringFnParamsCommon):
type: Literal["subset_of"] = "subset_of"
@json_schema_type
class FactualityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["factuality"] = "factuality"
@json_schema_type
class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["faithfulness"] = "faithfulness"
@json_schema_type
class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_correctness"] = "answer_correctness"
@json_schema_type
class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_relevancy"] = "answer_relevancy"
@json_schema_type
class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_similarity"] = "answer_similarity"
@json_schema_type
class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_entity_recall"] = "context_entity_recall"
@json_schema_type
class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_precision"] = "context_precision"
@json_schema_type
class ContextRecallScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_recall"] = "context_recall"
@json_schema_type
class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_relevancy"] = "context_relevancy"
@json_schema_type
class LLMAsJudgeScoringFnParams(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge" type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
judge_model: str judge_model: str
prompt_template: Optional[str] = None prompt_template: Optional[str] = None
@ -168,16 +101,84 @@ class LLMAsJudgeScoringFnParams(BaseModel):
description="Regexes to extract the answer from generated response", description="Regexes to extract the answer from generated response",
default_factory=list, default_factory=list,
) )
aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
description="Aggregation functions to apply to the scores of each row", @json_schema_type
default_factory=list, class RegexParserScoringFnParams(BaseModel):
) type: Literal["regex_parser"] = "regex_parser"
regex_parser: RegexParserScoringFnParamsFields
@json_schema_type
class RegexParserMathScoringFnParams(BaseModel):
type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
regex_parser_math_response: RegexParserScoringFnParamsFields
@json_schema_type
class EqualityScoringFnParams(BaseModel):
type: Literal["equality"] = "equality"
equality: BasicScoringFnParamsFields
@json_schema_type
class SubsetOfcoringFnParams(BaseModel):
type: Literal["subset_of"] = "subset_of"
subset_of: BasicScoringFnParamsFields
@json_schema_type
class FactualityScoringFnParams(BaseModel):
type: Literal["factuality"] = "factuality"
factuality: BasicScoringFnParamsFields
@json_schema_type
class FaithfulnessScoringFnParams(BaseModel):
type: Literal["faithfulness"] = "faithfulness"
faithfulness: BasicScoringFnParamsFields
@json_schema_type
class AnswerCorrectnessScoringFnParams(BaseModel):
type: Literal["answer_correctness"] = "answer_correctness"
answer_correctness: BasicScoringFnParamsFields
@json_schema_type
class AnswerRelevancyScoringFnParams(BaseModel):
type: Literal["answer_relevancy"] = "answer_relevancy"
answer_relevancy: BasicScoringFnParamsFields
@json_schema_type
class AnswerSimilarityScoringFnParams(BaseModel):
type: Literal["answer_similarity"] = "answer_similarity"
answer_similarity: BasicScoringFnParamsFields
@json_schema_type
class ContextEntityRecallScoringFnParams(BaseModel):
type: Literal["context_entity_recall"] = "context_entity_recall"
context_entity_recall: BasicScoringFnParamsFields
@json_schema_type
class ContextPrecisionScoringFnParams(BaseModel):
type: Literal["context_precision"] = "context_precision"
context_precision: BasicScoringFnParamsFields
@json_schema_type
class ContextRecallScoringFnParams(BaseModel):
type: Literal["context_recall"] = "context_recall"
context_recall: BasicScoringFnParamsFields
@json_schema_type
class ContextRelevancyScoringFnParams(BaseModel):
type: Literal["context_relevancy"] = "context_relevancy"
context_relevancy: BasicScoringFnParamsFields
@json_schema_type
class CustomLLMAsJudgeScoringFnParams(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
custom_llm_as_judge: CustomLLMAsJudgeScoringFnParamsFields
ScoringFnParams = register_schema( ScoringFnParams = register_schema(
Annotated[ Annotated[
Union[ Union[
LLMAsJudgeScoringFnParams, CustomLLMAsJudgeScoringFnParams,
RegexParserScoringFnParams, RegexParserScoringFnParams,
RegexParserMathScoringFnParams, RegexParserMathScoringFnParams,
EqualityScoringFnParams, EqualityScoringFnParams,