single type

This commit is contained in:
Xi Yan 2025-03-11 23:20:16 -07:00
parent bc71980769
commit bec5a46915
3 changed files with 639 additions and 166 deletions

View file

@ -6351,28 +6351,71 @@
"title": "AggregationFunctionType", "title": "AggregationFunctionType",
"description": "A type of aggregation function." "description": "A type of aggregation function."
}, },
"BasicScoringFnParams": { "AnswerCorrectnessScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"type": {
"type": "string",
"const": "basic",
"default": "basic"
},
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "$ref": "#/components/schemas/AggregationFunctionType"
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. No aggregation for results is calculated if not provided." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "answer_correctness",
"default": "answer_correctness"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type"
], ],
"title": "BasicScoringFnParams", "title": "AnswerCorrectnessScoringFnParams"
"description": "Parameters for a non-parameterized scoring function." },
"AnswerRelevancyScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "answer_relevancy",
"default": "answer_relevancy"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "AnswerRelevancyScoringFnParams"
},
"AnswerSimilarityScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "answer_similarity",
"default": "answer_similarity"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "AnswerSimilarityScoringFnParams"
}, },
"BenchmarkConfig": { "BenchmarkConfig": {
"type": "object", "type": "object",
@ -6401,6 +6444,116 @@
"title": "BenchmarkConfig", "title": "BenchmarkConfig",
"description": "A benchmark configuration for evaluation." "description": "A benchmark configuration for evaluation."
}, },
"ContextEntityRecallScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "context_entity_recall",
"default": "context_entity_recall"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ContextEntityRecallScoringFnParams"
},
"ContextPrecisionScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "context_precision",
"default": "context_precision"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ContextPrecisionScoringFnParams"
},
"ContextRecallScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "context_recall",
"default": "context_recall"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ContextRecallScoringFnParams"
},
"ContextRelevancyScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "context_relevancy",
"default": "context_relevancy"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ContextRelevancyScoringFnParams"
},
"EqualityScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "equality",
"default": "equality"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "EqualityScoringFnParams"
},
"EvalCandidate": { "EvalCandidate": {
"oneOf": [ "oneOf": [
{ {
@ -6418,6 +6571,50 @@
} }
} }
}, },
"FactualityScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "factuality",
"default": "factuality"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "FactualityScoringFnParams"
},
"FaithfulnessScoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "faithfulness",
"default": "faithfulness"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "FaithfulnessScoringFnParams"
},
"LLMAsJudgeScoringFnParams": { "LLMAsJudgeScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -6427,26 +6624,22 @@
"default": "custom_llm_as_judge" "default": "custom_llm_as_judge"
}, },
"judge_model": { "judge_model": {
"type": "string", "type": "string"
"description": "The model to use for scoring."
}, },
"prompt_template": { "prompt_template": {
"type": "string", "type": "string"
"description": "(Optional) The prompt template to use for scoring."
}, },
"judge_score_regexes": { "judge_score_regexes": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
}, }
"description": "(Optional) Regexes to extract the score from the judge model's response."
}, },
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "$ref": "#/components/schemas/AggregationFunctionType"
}, }
"description": "(Optional) Aggregation functions to apply to the scores of each row. No aggregation for results is calculated if not provided."
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -6454,8 +6647,7 @@
"type", "type",
"judge_model" "judge_model"
], ],
"title": "LLMAsJudgeScoringFnParams", "title": "LLMAsJudgeScoringFnParams"
"description": "Parameters for a scoring function that uses a judge model to score the answer."
}, },
"ModelCandidate": { "ModelCandidate": {
"type": "object", "type": "object",
@ -6487,35 +6679,65 @@
"title": "ModelCandidate", "title": "ModelCandidate",
"description": "A model candidate for evaluation." "description": "A model candidate for evaluation."
}, },
"RegexParserScoringFnParams": { "RegexParserMathScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"type": {
"type": "string",
"const": "regex_parser",
"default": "regex_parser"
},
"parsing_regexes": { "parsing_regexes": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
}, },
"description": "Regexes to extract the answer from generated response" "description": "(Optional) Regexes to extract the answer from generated response."
}, },
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "$ref": "#/components/schemas/AggregationFunctionType"
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. No aggregation for results is calculated if not provided." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "regex_parser_math_response",
"default": "regex_parser_math_response"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"parsing_regexes",
"type" "type"
], ],
"title": "RegexParserScoringFnParams", "title": "RegexParserMathScoringFnParams"
"description": "Parameters for a scoring function that parses the answer from the generated response using regexes, and checks against the expected answer." },
"RegexParserScoringFnParams": {
"type": "object",
"properties": {
"parsing_regexes": {
"type": "array",
"items": {
"type": "string"
},
"description": "(Optional) Regexes to extract the answer from generated response."
},
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "regex_parser",
"default": "regex_parser"
}
},
"additionalProperties": false,
"required": [
"parsing_regexes",
"type"
],
"title": "RegexParserScoringFnParams"
}, },
"ScoringFnParams": { "ScoringFnParams": {
"oneOf": [ "oneOf": [
@ -6526,7 +6748,40 @@
"$ref": "#/components/schemas/RegexParserScoringFnParams" "$ref": "#/components/schemas/RegexParserScoringFnParams"
}, },
{ {
"$ref": "#/components/schemas/BasicScoringFnParams" "$ref": "#/components/schemas/RegexParserMathScoringFnParams"
},
{
"$ref": "#/components/schemas/EqualityScoringFnParams"
},
{
"$ref": "#/components/schemas/SubsetOfcoringFnParams"
},
{
"$ref": "#/components/schemas/FactualityScoringFnParams"
},
{
"$ref": "#/components/schemas/FaithfulnessScoringFnParams"
},
{
"$ref": "#/components/schemas/AnswerCorrectnessScoringFnParams"
},
{
"$ref": "#/components/schemas/AnswerRelevancyScoringFnParams"
},
{
"$ref": "#/components/schemas/AnswerSimilarityScoringFnParams"
},
{
"$ref": "#/components/schemas/ContextEntityRecallScoringFnParams"
},
{
"$ref": "#/components/schemas/ContextPrecisionScoringFnParams"
},
{
"$ref": "#/components/schemas/ContextRecallScoringFnParams"
},
{
"$ref": "#/components/schemas/ContextRelevancyScoringFnParams"
} }
], ],
"discriminator": { "discriminator": {
@ -6534,10 +6789,43 @@
"mapping": { "mapping": {
"custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams", "custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
"regex_parser": "#/components/schemas/RegexParserScoringFnParams", "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
"basic": "#/components/schemas/BasicScoringFnParams" "regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams",
"equality": "#/components/schemas/EqualityScoringFnParams",
"subset_of": "#/components/schemas/SubsetOfcoringFnParams",
"factuality": "#/components/schemas/FactualityScoringFnParams",
"faithfulness": "#/components/schemas/FaithfulnessScoringFnParams",
"answer_correctness": "#/components/schemas/AnswerCorrectnessScoringFnParams",
"answer_relevancy": "#/components/schemas/AnswerRelevancyScoringFnParams",
"answer_similarity": "#/components/schemas/AnswerSimilarityScoringFnParams",
"context_entity_recall": "#/components/schemas/ContextEntityRecallScoringFnParams",
"context_precision": "#/components/schemas/ContextPrecisionScoringFnParams",
"context_recall": "#/components/schemas/ContextRecallScoringFnParams",
"context_relevancy": "#/components/schemas/ContextRelevancyScoringFnParams"
} }
} }
}, },
"SubsetOfcoringFnParams": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AggregationFunctionType"
},
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
},
"type": {
"type": "string",
"const": "subset_of",
"default": "subset_of"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "SubsetOfcoringFnParams"
},
"EvaluateRowsRequest": { "EvaluateRowsRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -9371,7 +9659,8 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"scoring_fn_type" "scoring_fn_type",
"params"
], ],
"title": "RegisterScoringFunctionRequest" "title": "RegisterScoringFunctionRequest"
}, },

View file

@ -4420,26 +4420,60 @@ components:
- accuracy - accuracy
title: AggregationFunctionType title: AggregationFunctionType
description: A type of aggregation function. description: A type of aggregation function.
BasicScoringFnParams: AnswerCorrectnessScoringFnParams:
type: object type: object
properties: properties:
type:
type: string
const: basic
default: basic
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' $ref: '#/components/schemas/AggregationFunctionType'
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. No (Optional) Aggregation functions to apply to the scores of each row. If
aggregation for results is calculated if not provided. not provided, no aggregation will be performed.
type:
type: string
const: answer_correctness
default: answer_correctness
additionalProperties: false additionalProperties: false
required: required:
- type - type
title: BasicScoringFnParams title: AnswerCorrectnessScoringFnParams
description: >- AnswerRelevancyScoringFnParams:
Parameters for a non-parameterized scoring function. type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: answer_relevancy
default: answer_relevancy
additionalProperties: false
required:
- type
title: AnswerRelevancyScoringFnParams
AnswerSimilarityScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: answer_similarity
default: answer_similarity
additionalProperties: false
required:
- type
title: AnswerSimilarityScoringFnParams
BenchmarkConfig: BenchmarkConfig:
type: object type: object
properties: properties:
@ -4465,6 +4499,96 @@ components:
title: BenchmarkConfig title: BenchmarkConfig
description: >- description: >-
A benchmark configuration for evaluation. A benchmark configuration for evaluation.
ContextEntityRecallScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: context_entity_recall
default: context_entity_recall
additionalProperties: false
required:
- type
title: ContextEntityRecallScoringFnParams
ContextPrecisionScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: context_precision
default: context_precision
additionalProperties: false
required:
- type
title: ContextPrecisionScoringFnParams
ContextRecallScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: context_recall
default: context_recall
additionalProperties: false
required:
- type
title: ContextRecallScoringFnParams
ContextRelevancyScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: context_relevancy
default: context_relevancy
additionalProperties: false
required:
- type
title: ContextRelevancyScoringFnParams
EqualityScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: equality
default: equality
additionalProperties: false
required:
- type
title: EqualityScoringFnParams
EvalCandidate: EvalCandidate:
oneOf: oneOf:
- $ref: '#/components/schemas/ModelCandidate' - $ref: '#/components/schemas/ModelCandidate'
@ -4474,6 +4598,42 @@ components:
mapping: mapping:
model: '#/components/schemas/ModelCandidate' model: '#/components/schemas/ModelCandidate'
agent: '#/components/schemas/AgentCandidate' agent: '#/components/schemas/AgentCandidate'
FactualityScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: factuality
default: factuality
additionalProperties: false
required:
- type
title: FactualityScoringFnParams
FaithfulnessScoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: faithfulness
default: faithfulness
additionalProperties: false
required:
- type
title: FaithfulnessScoringFnParams
LLMAsJudgeScoringFnParams: LLMAsJudgeScoringFnParams:
type: object type: object
properties: properties:
@ -4483,31 +4643,21 @@ components:
default: custom_llm_as_judge default: custom_llm_as_judge
judge_model: judge_model:
type: string type: string
description: The model to use for scoring.
prompt_template: prompt_template:
type: string type: string
description: >-
(Optional) The prompt template to use for scoring.
judge_score_regexes: judge_score_regexes:
type: array type: array
items: items:
type: string type: string
description: >-
(Optional) Regexes to extract the score from the judge model's response.
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' $ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. No
aggregation for results is calculated if not provided.
additionalProperties: false additionalProperties: false
required: required:
- type - type
- judge_model - judge_model
title: LLMAsJudgeScoringFnParams title: LLMAsJudgeScoringFnParams
description: >-
Parameters for a scoring function that uses a judge model to score the answer.
ModelCandidate: ModelCandidate:
type: object type: object
properties: properties:
@ -4533,44 +4683,107 @@ components:
- sampling_params - sampling_params
title: ModelCandidate title: ModelCandidate
description: A model candidate for evaluation. description: A model candidate for evaluation.
RegexParserScoringFnParams: RegexParserMathScoringFnParams:
type: object type: object
properties: properties:
type:
type: string
const: regex_parser
default: regex_parser
parsing_regexes: parsing_regexes:
type: array type: array
items: items:
type: string type: string
description: >- description: >-
Regexes to extract the answer from generated response (Optional) Regexes to extract the answer from generated response.
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' $ref: '#/components/schemas/AggregationFunctionType'
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. No (Optional) Aggregation functions to apply to the scores of each row. If
aggregation for results is calculated if not provided. not provided, no aggregation will be performed.
type:
type: string
const: regex_parser_math_response
default: regex_parser_math_response
additionalProperties: false additionalProperties: false
required: required:
- parsing_regexes
- type
title: RegexParserMathScoringFnParams
RegexParserScoringFnParams:
type: object
properties:
parsing_regexes:
type: array
items:
type: string
description: >-
(Optional) Regexes to extract the answer from generated response.
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: regex_parser
default: regex_parser
additionalProperties: false
required:
- parsing_regexes
- type - type
title: RegexParserScoringFnParams title: RegexParserScoringFnParams
description: >-
Parameters for a scoring function that parses the answer from the generated
response using regexes, and checks against the expected answer.
ScoringFnParams: ScoringFnParams:
oneOf: oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
- $ref: '#/components/schemas/RegexParserScoringFnParams' - $ref: '#/components/schemas/RegexParserScoringFnParams'
- $ref: '#/components/schemas/BasicScoringFnParams' - $ref: '#/components/schemas/RegexParserMathScoringFnParams'
- $ref: '#/components/schemas/EqualityScoringFnParams'
- $ref: '#/components/schemas/SubsetOfcoringFnParams'
- $ref: '#/components/schemas/FactualityScoringFnParams'
- $ref: '#/components/schemas/FaithfulnessScoringFnParams'
- $ref: '#/components/schemas/AnswerCorrectnessScoringFnParams'
- $ref: '#/components/schemas/AnswerRelevancyScoringFnParams'
- $ref: '#/components/schemas/AnswerSimilarityScoringFnParams'
- $ref: '#/components/schemas/ContextEntityRecallScoringFnParams'
- $ref: '#/components/schemas/ContextPrecisionScoringFnParams'
- $ref: '#/components/schemas/ContextRecallScoringFnParams'
- $ref: '#/components/schemas/ContextRelevancyScoringFnParams'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams' regex_parser: '#/components/schemas/RegexParserScoringFnParams'
basic: '#/components/schemas/BasicScoringFnParams' regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams'
equality: '#/components/schemas/EqualityScoringFnParams'
subset_of: '#/components/schemas/SubsetOfcoringFnParams'
factuality: '#/components/schemas/FactualityScoringFnParams'
faithfulness: '#/components/schemas/FaithfulnessScoringFnParams'
answer_correctness: '#/components/schemas/AnswerCorrectnessScoringFnParams'
answer_relevancy: '#/components/schemas/AnswerRelevancyScoringFnParams'
answer_similarity: '#/components/schemas/AnswerSimilarityScoringFnParams'
context_entity_recall: '#/components/schemas/ContextEntityRecallScoringFnParams'
context_precision: '#/components/schemas/ContextPrecisionScoringFnParams'
context_recall: '#/components/schemas/ContextRecallScoringFnParams'
context_relevancy: '#/components/schemas/ContextRelevancyScoringFnParams'
SubsetOfcoringFnParams:
type: object
properties:
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
(Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed.
type:
type: string
const: subset_of
default: subset_of
additionalProperties: false
required:
- type
title: SubsetOfcoringFnParams
EvaluateRowsRequest: EvaluateRowsRequest:
type: object type: object
properties: properties:
@ -6364,6 +6577,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- scoring_fn_type - scoring_fn_type
- params
title: RegisterScoringFunctionRequest title: RegisterScoringFunctionRequest
RegisterShieldRequest: RegisterShieldRequest:
type: object type: object

View file

@ -68,110 +68,27 @@ class AggregationFunctionType(Enum):
accuracy = "accuracy" accuracy = "accuracy"
# TODO(xiyan): class BasicScoringFnParamsCommon(BaseModel):
# ============= OPTION 1: SEPARATE ScoringFnParamsType + ScoringFunctionType ============= """
# class ScoringFnParamsType(Enum): :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
# """ """
# A type of scoring function parameters.
# :cvar llm_as_judge: Provide judge model and prompt template.
# :cvar regex_parser: Provide regexes to parse the answer from the generated response.
# :cvar basic: Parameters for basic non-parameterized scoring function.
# """
# custom_llm_as_judge = "custom_llm_as_judge"
# regex_parser = "regex_parser"
# basic = "basic"
# @json_schema_type
# class LLMAsJudgeScoringFnParams(BaseModel):
# """
# Parameters for a scoring function that uses a judge model to score the answer.
# :param judge_model: The model to use for scoring.
# :param prompt_template: (Optional) The prompt template to use for scoring.
# :param judge_score_regexes: (Optional) Regexes to extract the score from the judge model's response.
# :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. No aggregation for results is calculated if not provided.
# """
# type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
# judge_model: str
# prompt_template: Optional[str] = None
# judge_score_regexes: Optional[List[str]] = Field(
# description="Regexes to extract the answer from generated response",
# default_factory=list,
# )
# aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
# description="Aggregation functions to apply to the scores of each row",
# default_factory=list,
# )
# @json_schema_type
# class RegexParserScoringFnParams(BaseModel):
# """
# Parameters for a scoring function that parses the answer from the generated response using regexes, and checks against the expected answer.
# :param parsing_regexes: Regexes to extract the answer from generated response
# :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. No aggregation for results is calculated if not provided.
# """
# type: Literal["regex_parser"] = "regex_parser"
# parsing_regexes: Optional[List[str]] = Field(
# description="Regexes to extract the answer from generated response",
# default_factory=list,
# )
# aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
# description="Aggregation functions to apply to the scores of each row",
# default_factory=list,
# )
# @json_schema_type
# class BasicScoringFnParams(BaseModel):
# """
# Parameters for a non-parameterized scoring function.
# :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. No aggregation for results is calculated if not provided.
# """
# type: Literal["basic"] = "basic"
# aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
# description="Aggregation functions to apply to the scores of each row",
# default_factory=list,
# )
# ScoringFnParams = register_schema(
# Annotated[
# Union[
# LLMAsJudgeScoringFnParams,
# RegexParserScoringFnParams,
# BasicScoringFnParams,
# ],
# Field(discriminator="type"),
# ],
# name="ScoringFnParams",
# )
# ============= END OF OPTION 1 =============
# TODO(xiyan):
# ============= OPTION 2: MERGE ScoringFnParamsType + ScoringFunctionType into ScoringFunctionType =============
class RegexParserScoringFnParamsCommon(BaseModel):
parsing_regexes: Optional[List[str]] = Field(
description="Regexes to extract the answer from generated response",
default_factory=list,
)
aggregation_functions: Optional[List[AggregationFunctionType]] = Field( aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
description="Aggregation functions to apply to the scores of each row", description="Aggregation functions to apply to the scores of each row",
default_factory=list, default_factory=list,
) )
class BasicScoringFnParamsCommon(BaseModel): class RegexParserScoringFnParamsCommon(BaseModel):
"""
:param parsing_regexes: (Optional) Regexes to extract the answer from generated response.
:param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
"""
parsing_regexes: List[str] = Field(
description="Regexes to extract the answer from generated response",
default_factory=list,
)
aggregation_functions: Optional[List[AggregationFunctionType]] = Field( aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
description="Aggregation functions to apply to the scores of each row", description="Aggregation functions to apply to the scores of each row",
default_factory=list, default_factory=list,
@ -198,6 +115,51 @@ class SubsetOfcoringFnParams(BasicScoringFnParamsCommon):
type: Literal["subset_of"] = "subset_of" type: Literal["subset_of"] = "subset_of"
@json_schema_type
class FactualityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["factuality"] = "factuality"
@json_schema_type
class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["faithfulness"] = "faithfulness"
@json_schema_type
class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_correctness"] = "answer_correctness"
@json_schema_type
class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_relevancy"] = "answer_relevancy"
@json_schema_type
class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["answer_similarity"] = "answer_similarity"
@json_schema_type
class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_entity_recall"] = "context_entity_recall"
@json_schema_type
class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_precision"] = "context_precision"
@json_schema_type
class ContextRecallScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_recall"] = "context_recall"
@json_schema_type
class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon):
type: Literal["context_relevancy"] = "context_relevancy"
@json_schema_type @json_schema_type
class LLMAsJudgeScoringFnParams(BaseModel): class LLMAsJudgeScoringFnParams(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge" type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
@ -221,6 +183,15 @@ ScoringFnParams = register_schema(
RegexParserMathScoringFnParams, RegexParserMathScoringFnParams,
EqualityScoringFnParams, EqualityScoringFnParams,
SubsetOfcoringFnParams, SubsetOfcoringFnParams,
FactualityScoringFnParams,
FaithfulnessScoringFnParams,
AnswerCorrectnessScoringFnParams,
AnswerRelevancyScoringFnParams,
AnswerSimilarityScoringFnParams,
ContextEntityRecallScoringFnParams,
ContextPrecisionScoringFnParams,
ContextRecallScoringFnParams,
ContextRelevancyScoringFnParams,
], ],
Field(discriminator="type"), Field(discriminator="type"),
], ],
@ -284,9 +255,8 @@ class ScoringFunctions(Protocol):
@webmethod(route="/scoring-functions", method="POST") @webmethod(route="/scoring-functions", method="POST")
async def register_scoring_function( async def register_scoring_function(
self, self,
# TODO(xiyan): scoring_fn_type will not be needed for OPTION 2 scoring_fn_type: ScoringFunctionType,
# scoring_fn_type: ScoringFunctionType, params: ScoringFnParams = None,
params: Optional[ScoringFnParams] = None,
scoring_fn_id: Optional[str] = None, scoring_fn_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, Any]] = None,
) -> ScoringFn: ) -> ScoringFn:
@ -294,7 +264,7 @@ class ScoringFunctions(Protocol):
Register a new scoring function with given parameters. Register a new scoring function with given parameters.
Only valid scoring function type that can be parameterized can be registered. Only valid scoring function type that can be parameterized can be registered.
# :param scoring_fn_type: The type of scoring function to register. :param scoring_fn_type: The type of scoring function to register.
:param params: The parameters for the scoring function. :param params: The parameters for the scoring function.
:param scoring_fn_id: (Optional) The ID of the scoring function to register. If not provided, a random ID will be generated. :param scoring_fn_id: (Optional) The ID of the scoring function to register. If not provided, a random ID will be generated.
:param metadata: (Optional) Any additional metadata to be associated with the scoring function. :param metadata: (Optional) Any additional metadata to be associated with the scoring function.