better params fields

This commit is contained in:
Xi Yan 2025-03-12 21:31:22 -07:00
parent 93c131ed5f
commit a7abe6df74
3 changed files with 763 additions and 571 deletions

View file

@ -6386,6 +6386,14 @@
"description": "An agent candidate for evaluation." "description": "An agent candidate for evaluation."
}, },
"AnswerCorrectnessScoringFnParams": { "AnswerCorrectnessScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "answer_correctness",
"default": "answer_correctness"
},
"answer_correctness": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6402,50 +6410,28 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "answer_correctness",
"default": "answer_correctness"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"answer_correctness"
], ],
"title": "AnswerCorrectnessScoringFnParams" "title": "AnswerCorrectnessScoringFnParams"
}, },
"AnswerRelevancyScoringFnParams": { "AnswerRelevancyScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "answer_relevancy", "const": "answer_relevancy",
"default": "answer_relevancy" "default": "answer_relevancy"
}
}, },
"additionalProperties": false, "answer_relevancy": {
"required": [
"type"
],
"title": "AnswerRelevancyScoringFnParams"
},
"AnswerSimilarityScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6462,16 +6448,54 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "answer_similarity",
"default": "answer_similarity"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"answer_relevancy"
],
"title": "AnswerRelevancyScoringFnParams"
},
"AnswerSimilarityScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "answer_similarity",
"default": "answer_similarity"
},
"answer_similarity": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
"type",
"answer_similarity"
], ],
"title": "AnswerSimilarityScoringFnParams" "title": "AnswerSimilarityScoringFnParams"
}, },
@ -6503,6 +6527,14 @@
"description": "A benchmark configuration for evaluation." "description": "A benchmark configuration for evaluation."
}, },
"ContextEntityRecallScoringFnParams": { "ContextEntityRecallScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "context_entity_recall",
"default": "context_entity_recall"
},
"context_entity_recall": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6519,20 +6551,28 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "context_entity_recall",
"default": "context_entity_recall"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_entity_recall"
], ],
"title": "ContextEntityRecallScoringFnParams" "title": "ContextEntityRecallScoringFnParams"
}, },
"ContextPrecisionScoringFnParams": { "ContextPrecisionScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "context_precision",
"default": "context_precision"
},
"context_precision": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6549,20 +6589,28 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "context_precision",
"default": "context_precision"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_precision"
], ],
"title": "ContextPrecisionScoringFnParams" "title": "ContextPrecisionScoringFnParams"
}, },
"ContextRecallScoringFnParams": { "ContextRecallScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "context_recall",
"default": "context_recall"
},
"context_recall": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6579,50 +6627,28 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "context_recall",
"default": "context_recall"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_recall"
], ],
"title": "ContextRecallScoringFnParams" "title": "ContextRecallScoringFnParams"
}, },
"ContextRelevancyScoringFnParams": { "ContextRelevancyScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "context_relevancy", "const": "context_relevancy",
"default": "context_relevancy" "default": "context_relevancy"
}
}, },
"additionalProperties": false, "context_relevancy": {
"required": [
"type"
],
"title": "ContextRelevancyScoringFnParams"
},
"EqualityScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6639,16 +6665,98 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "equality",
"default": "equality"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"context_relevancy"
],
"title": "ContextRelevancyScoringFnParams"
},
"CustomLLMAsJudgeScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom_llm_as_judge",
"default": "custom_llm_as_judge"
},
"custom_llm_as_judge": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom_llm_as_judge",
"default": "custom_llm_as_judge"
},
"judge_model": {
"type": "string"
},
"prompt_template": {
"type": "string"
},
"judge_score_regexes": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"type",
"judge_model"
],
"title": "CustomLLMAsJudgeScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
"type",
"custom_llm_as_judge"
],
"title": "CustomLLMAsJudgeScoringFnParams"
},
"EqualityScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "equality",
"default": "equality"
},
"equality": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
"type",
"equality"
], ],
"title": "EqualityScoringFnParams" "title": "EqualityScoringFnParams"
}, },
@ -6670,6 +6778,14 @@
} }
}, },
"FactualityScoringFnParams": { "FactualityScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "factuality",
"default": "factuality"
},
"factuality": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6686,20 +6802,28 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "factuality",
"default": "factuality"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"factuality"
], ],
"title": "FactualityScoringFnParams" "title": "FactualityScoringFnParams"
}, },
"FaithfulnessScoringFnParams": { "FaithfulnessScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "faithfulness",
"default": "faithfulness"
},
"faithfulness": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6716,60 +6840,18 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "faithfulness",
"default": "faithfulness"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "title": "BasicScoringFnParamsFields"
"type"
],
"title": "FaithfulnessScoringFnParams"
},
"LLMAsJudgeScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "custom_llm_as_judge",
"default": "custom_llm_as_judge"
},
"judge_model": {
"type": "string"
},
"prompt_template": {
"type": "string"
},
"judge_score_regexes": {
"type": "array",
"items": {
"type": "string"
}
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type", "type",
"judge_model" "faithfulness"
], ],
"title": "LLMAsJudgeScoringFnParams" "title": "FaithfulnessScoringFnParams"
}, },
"ModelCandidate": { "ModelCandidate": {
"type": "object", "type": "object",
@ -6804,42 +6886,12 @@
"RegexParserMathScoringFnParams": { "RegexParserMathScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"parsing_regexes": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Regexes to extract the answer from generated response."
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": { "type": {
"type": "string", "type": "string",
"const": "regex_parser_math_response", "const": "regex_parser_math_response",
"default": "regex_parser_math_response" "default": "regex_parser_math_response"
}
}, },
"additionalProperties": false, "regex_parser_math_response": {
"required": [
"parsing_regexes",
"type"
],
"title": "RegexParserMathScoringFnParams"
},
"RegexParserScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"parsing_regexes": { "parsing_regexes": {
@ -6863,24 +6915,74 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "regex_parser",
"default": "regex_parser"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"parsing_regexes", "parsing_regexes"
"type" ],
"title": "RegexParserScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
"type",
"regex_parser_math_response"
],
"title": "RegexParserMathScoringFnParams"
},
"RegexParserScoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "regex_parser",
"default": "regex_parser"
},
"regex_parser": {
"type": "object",
"properties": {
"parsing_regexes": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Regexes to extract the answer from generated response."
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
},
"additionalProperties": false,
"required": [
"parsing_regexes"
],
"title": "RegexParserScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
"type",
"regex_parser"
], ],
"title": "RegexParserScoringFnParams" "title": "RegexParserScoringFnParams"
}, },
"ScoringFnParams": { "ScoringFnParams": {
"oneOf": [ "oneOf": [
{ {
"$ref": "#/components/schemas/LLMAsJudgeScoringFnParams" "$ref": "#/components/schemas/CustomLLMAsJudgeScoringFnParams"
}, },
{ {
"$ref": "#/components/schemas/RegexParserScoringFnParams" "$ref": "#/components/schemas/RegexParserScoringFnParams"
@ -6925,7 +7027,7 @@
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams", "custom_llm_as_judge": "#/components/schemas/CustomLLMAsJudgeScoringFnParams",
"regex_parser": "#/components/schemas/RegexParserScoringFnParams", "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
"regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams", "regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams",
"equality": "#/components/schemas/EqualityScoringFnParams", "equality": "#/components/schemas/EqualityScoringFnParams",
@ -6943,6 +7045,14 @@
} }
}, },
"SubsetOfcoringFnParams": { "SubsetOfcoringFnParams": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "subset_of",
"default": "subset_of"
},
"subset_of": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
@ -6959,16 +7069,16 @@
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}
}, },
"type": { "additionalProperties": false,
"type": "string", "title": "BasicScoringFnParamsFields"
"const": "subset_of",
"default": "subset_of"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type",
"subset_of"
], ],
"title": "SubsetOfcoringFnParams" "title": "SubsetOfcoringFnParams"
}, },

View file

@ -4451,54 +4451,42 @@ components:
AnswerCorrectnessScoringFnParams: AnswerCorrectnessScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: answer_correctness const: answer_correctness
default: answer_correctness default: answer_correctness
answer_correctness:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- answer_correctness
title: AnswerCorrectnessScoringFnParams title: AnswerCorrectnessScoringFnParams
AnswerRelevancyScoringFnParams: AnswerRelevancyScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: answer_relevancy const: answer_relevancy
default: answer_relevancy default: answer_relevancy
additionalProperties: false answer_relevancy:
required:
- type
title: AnswerRelevancyScoringFnParams
AnswerSimilarityScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions: aggregation_functions:
@ -4513,15 +4501,45 @@ components:
title: AggregationFunctionType title: AggregationFunctionType
description: A type of aggregation function. description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row.
not provided, no aggregation will be performed. If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
- answer_relevancy
title: AnswerRelevancyScoringFnParams
AnswerSimilarityScoringFnParams:
type: object
properties:
type: type:
type: string type: string
const: answer_similarity const: answer_similarity
default: answer_similarity default: answer_similarity
answer_similarity:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- answer_similarity
title: AnswerSimilarityScoringFnParams title: AnswerSimilarityScoringFnParams
BenchmarkConfig: BenchmarkConfig:
type: object type: object
@ -4551,188 +4569,135 @@ components:
ContextEntityRecallScoringFnParams: ContextEntityRecallScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_entity_recall const: context_entity_recall
default: context_entity_recall default: context_entity_recall
context_entity_recall:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_entity_recall
title: ContextEntityRecallScoringFnParams title: ContextEntityRecallScoringFnParams
ContextPrecisionScoringFnParams: ContextPrecisionScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_precision const: context_precision
default: context_precision default: context_precision
context_precision:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_precision
title: ContextPrecisionScoringFnParams title: ContextPrecisionScoringFnParams
ContextRecallScoringFnParams: ContextRecallScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_recall const: context_recall
default: context_recall default: context_recall
context_recall:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_recall
title: ContextRecallScoringFnParams title: ContextRecallScoringFnParams
ContextRelevancyScoringFnParams: ContextRelevancyScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: context_relevancy const: context_relevancy
default: context_relevancy default: context_relevancy
context_relevancy:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- context_relevancy
title: ContextRelevancyScoringFnParams title: ContextRelevancyScoringFnParams
EqualityScoringFnParams: CustomLLMAsJudgeScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: equality const: custom_llm_as_judge
default: equality default: custom_llm_as_judge
additionalProperties: false custom_llm_as_judge:
required:
- type
title: EqualityScoringFnParams
EvalCandidate:
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
- $ref: '#/components/schemas/AgentCandidate'
discriminator:
propertyName: type
mapping:
model: '#/components/schemas/ModelCandidate'
agent: '#/components/schemas/AgentCandidate'
FactualityScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: factuality
default: factuality
additionalProperties: false
required:
- type
title: FactualityScoringFnParams
FaithfulnessScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: faithfulness
default: faithfulness
additionalProperties: false
required:
- type
title: FaithfulnessScoringFnParams
LLMAsJudgeScoringFnParams:
type: object type: object
properties: properties:
type: type:
@ -4747,6 +4712,26 @@ components:
type: array type: array
items: items:
type: string type: string
additionalProperties: false
required:
- type
- judge_model
title: CustomLLMAsJudgeScoringFnParamsFields
additionalProperties: false
required:
- type
- custom_llm_as_judge
title: CustomLLMAsJudgeScoringFnParams
EqualityScoringFnParams:
type: object
properties:
type:
type: string
const: equality
default: equality
equality:
type: object
properties:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
@ -4758,11 +4743,87 @@ components:
- accuracy - accuracy
title: AggregationFunctionType title: AggregationFunctionType
description: A type of aggregation function. description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false additionalProperties: false
required: required:
- type - type
- judge_model - equality
title: LLMAsJudgeScoringFnParams title: EqualityScoringFnParams
EvalCandidate:
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
- $ref: '#/components/schemas/AgentCandidate'
discriminator:
propertyName: type
mapping:
model: '#/components/schemas/ModelCandidate'
agent: '#/components/schemas/AgentCandidate'
FactualityScoringFnParams:
type: object
properties:
type:
type: string
const: factuality
default: factuality
factuality:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
- factuality
title: FactualityScoringFnParams
FaithfulnessScoringFnParams:
type: object
properties:
type:
type: string
const: faithfulness
default: faithfulness
faithfulness:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false
title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
- faithfulness
title: FaithfulnessScoringFnParams
ModelCandidate: ModelCandidate:
type: object type: object
properties: properties:
@ -4791,36 +4852,11 @@ components:
RegexParserMathScoringFnParams: RegexParserMathScoringFnParams:
type: object type: object
properties: properties:
parsing_regexes:
type: array
items:
type: string
description: >-
(Optional) Regexes to extract the answer from generated response.
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type: type:
type: string type: string
const: regex_parser_math_response const: regex_parser_math_response
default: regex_parser_math_response default: regex_parser_math_response
additionalProperties: false regex_parser_math_response:
required:
- parsing_regexes
- type
title: RegexParserMathScoringFnParams
RegexParserScoringFnParams:
type: object type: object
properties: properties:
parsing_regexes: parsing_regexes:
@ -4841,20 +4877,59 @@ components:
title: AggregationFunctionType title: AggregationFunctionType
description: A type of aggregation function. description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row.
not provided, no aggregation will be performed. If not provided, no aggregation will be performed.
additionalProperties: false
required:
- parsing_regexes
title: RegexParserScoringFnParamsFields
additionalProperties: false
required:
- type
- regex_parser_math_response
title: RegexParserMathScoringFnParams
RegexParserScoringFnParams:
type: object
properties:
type: type:
type: string type: string
const: regex_parser const: regex_parser
default: regex_parser default: regex_parser
regex_parser:
type: object
properties:
parsing_regexes:
type: array
items:
type: string
description: >-
(Optional) Regexes to extract the answer from generated response.
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >-
(Optional) Aggregation functions to apply to the scores of each row.
If not provided, no aggregation will be performed.
additionalProperties: false additionalProperties: false
required: required:
- parsing_regexes - parsing_regexes
title: RegexParserScoringFnParamsFields
additionalProperties: false
required:
- type - type
- regex_parser
title: RegexParserScoringFnParams title: RegexParserScoringFnParams
ScoringFnParams: ScoringFnParams:
oneOf: oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - $ref: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
- $ref: '#/components/schemas/RegexParserScoringFnParams' - $ref: '#/components/schemas/RegexParserScoringFnParams'
- $ref: '#/components/schemas/RegexParserMathScoringFnParams' - $ref: '#/components/schemas/RegexParserMathScoringFnParams'
- $ref: '#/components/schemas/EqualityScoringFnParams' - $ref: '#/components/schemas/EqualityScoringFnParams'
@ -4871,7 +4946,7 @@ components:
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' custom_llm_as_judge: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams' regex_parser: '#/components/schemas/RegexParserScoringFnParams'
regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams' regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams'
equality: '#/components/schemas/EqualityScoringFnParams' equality: '#/components/schemas/EqualityScoringFnParams'
@ -4886,6 +4961,13 @@ components:
context_recall: '#/components/schemas/ContextRecallScoringFnParams' context_recall: '#/components/schemas/ContextRecallScoringFnParams'
context_relevancy: '#/components/schemas/ContextRelevancyScoringFnParams' context_relevancy: '#/components/schemas/ContextRelevancyScoringFnParams'
SubsetOfcoringFnParams: SubsetOfcoringFnParams:
type: object
properties:
type:
type: string
const: subset_of
default: subset_of
subset_of:
type: object type: object
properties: properties:
aggregation_functions: aggregation_functions:
@ -4900,15 +4982,14 @@ components:
title: AggregationFunctionType title: AggregationFunctionType
description: A type of aggregation function. description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row.
not provided, no aggregation will be performed. If not provided, no aggregation will be performed.
type: additionalProperties: false
type: string title: BasicScoringFnParamsFields
const: subset_of
default: subset_of
additionalProperties: false additionalProperties: false
required: required:
- type - type
- subset_of
title: SubsetOfcoringFnParams title: SubsetOfcoringFnParams
EvaluateRowsRequest: EvaluateRowsRequest:
type: object type: object

View file

@ -67,7 +67,7 @@ class AggregationFunctionType(Enum):
accuracy = "accuracy" accuracy = "accuracy"
class BasicScoringFnParamsCommon(BaseModel): class BasicScoringFnParamsFields(BaseModel):
""" """
:param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed. :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
""" """
@ -78,7 +78,7 @@ class BasicScoringFnParamsCommon(BaseModel):
) )
class RegexParserScoringFnParamsCommon(BaseModel): class RegexParserScoringFnParamsFields(BaseModel):
""" """
:param parsing_regexes: (Optional) Regexes to extract the answer from generated response. :param parsing_regexes: (Optional) Regexes to extract the answer from generated response.
:param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed. :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
@ -93,74 +93,7 @@ class RegexParserScoringFnParamsCommon(BaseModel):
default_factory=list, default_factory=list,
) )
class CustomLLMAsJudgeScoringFnParamsFields(BaseModel):
@json_schema_type
class RegexParserScoringFnParams(RegexParserScoringFnParamsCommon):
type: Literal["regex_parser"] = "regex_parser"
@json_schema_type
class RegexParserMathScoringFnParams(RegexParserScoringFnParamsCommon):
type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
@json_schema_type
class EqualityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["equality"] = "equality"
@json_schema_type
class SubsetOfcoringFnParams(BasicScoringFnParamsCommon):
type: Literal["subset_of"] = "subset_of"
@json_schema_type
class FactualityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["factuality"] = "factuality"
@json_schema_type
class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["faithfulness"] = "faithfulness"
@json_schema_type
class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_correctness"] = "answer_correctness"
@json_schema_type
class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_relevancy"] = "answer_relevancy"
@json_schema_type
class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_similarity"] = "answer_similarity"
@json_schema_type
class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_entity_recall"] = "context_entity_recall"
@json_schema_type
class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_precision"] = "context_precision"
@json_schema_type
class ContextRecallScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_recall"] = "context_recall"
@json_schema_type
class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_relevancy"] = "context_relevancy"
@json_schema_type
class LLMAsJudgeScoringFnParams(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge" type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
judge_model: str judge_model: str
prompt_template: Optional[str] = None prompt_template: Optional[str] = None
@ -168,16 +101,84 @@ class LLMAsJudgeScoringFnParams(BaseModel):
description="Regexes to extract the answer from generated response", description="Regexes to extract the answer from generated response",
default_factory=list, default_factory=list,
) )
aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
description="Aggregation functions to apply to the scores of each row", @json_schema_type
default_factory=list, class RegexParserScoringFnParams(BaseModel):
) type: Literal["regex_parser"] = "regex_parser"
regex_parser: RegexParserScoringFnParamsFields
@json_schema_type
class RegexParserMathScoringFnParams(BaseModel):
type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
regex_parser_math_response: RegexParserScoringFnParamsFields
@json_schema_type
class EqualityScoringFnParams(BaseModel):
type: Literal["equality"] = "equality"
equality: BasicScoringFnParamsFields
@json_schema_type
class SubsetOfcoringFnParams(BaseModel):
type: Literal["subset_of"] = "subset_of"
subset_of: BasicScoringFnParamsFields
@json_schema_type
class FactualityScoringFnParams(BaseModel):
type: Literal["factuality"] = "factuality"
factuality: BasicScoringFnParamsFields
@json_schema_type
class FaithfulnessScoringFnParams(BaseModel):
type: Literal["faithfulness"] = "faithfulness"
faithfulness: BasicScoringFnParamsFields
@json_schema_type
class AnswerCorrectnessScoringFnParams(BaseModel):
type: Literal["answer_correctness"] = "answer_correctness"
answer_correctness: BasicScoringFnParamsFields
@json_schema_type
class AnswerRelevancyScoringFnParams(BaseModel):
type: Literal["answer_relevancy"] = "answer_relevancy"
answer_relevancy: BasicScoringFnParamsFields
@json_schema_type
class AnswerSimilarityScoringFnParams(BaseModel):
type: Literal["answer_similarity"] = "answer_similarity"
answer_similarity: BasicScoringFnParamsFields
@json_schema_type
class ContextEntityRecallScoringFnParams(BaseModel):
type: Literal["context_entity_recall"] = "context_entity_recall"
context_entity_recall: BasicScoringFnParamsFields
@json_schema_type
class ContextPrecisionScoringFnParams(BaseModel):
type: Literal["context_precision"] = "context_precision"
context_precision: BasicScoringFnParamsFields
@json_schema_type
class ContextRecallScoringFnParams(BaseModel):
type: Literal["context_recall"] = "context_recall"
context_recall: BasicScoringFnParamsFields
@json_schema_type
class ContextRelevancyScoringFnParams(BaseModel):
type: Literal["context_relevancy"] = "context_relevancy"
context_relevancy: BasicScoringFnParamsFields
@json_schema_type
class CustomLLMAsJudgeScoringFnParams(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
custom_llm_as_judge: CustomLLMAsJudgeScoringFnParamsFields
ScoringFnParams = register_schema( ScoringFnParams = register_schema(
Annotated[ Annotated[
Union[ Union[
LLMAsJudgeScoringFnParams, CustomLLMAsJudgeScoringFnParams,
RegexParserScoringFnParams, RegexParserScoringFnParams,
RegexParserMathScoringFnParams, RegexParserMathScoringFnParams,
EqualityScoringFnParams, EqualityScoringFnParams,