diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 8142b1c7b..7c28fd8e5 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -6388,90 +6388,114 @@
"AnswerCorrectnessScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "answer_correctness",
"default": "answer_correctness"
+ },
+ "answer_correctness": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "answer_correctness"
],
"title": "AnswerCorrectnessScoringFnParams"
},
"AnswerRelevancyScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "answer_relevancy",
"default": "answer_relevancy"
+ },
+ "answer_relevancy": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "answer_relevancy"
],
"title": "AnswerRelevancyScoringFnParams"
},
"AnswerSimilarityScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "answer_similarity",
"default": "answer_similarity"
+ },
+ "answer_similarity": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "answer_similarity"
],
"title": "AnswerSimilarityScoringFnParams"
},
@@ -6505,150 +6529,234 @@
"ContextEntityRecallScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "context_entity_recall",
"default": "context_entity_recall"
+ },
+ "context_entity_recall": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "context_entity_recall"
],
"title": "ContextEntityRecallScoringFnParams"
},
"ContextPrecisionScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "context_precision",
"default": "context_precision"
+ },
+ "context_precision": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "context_precision"
],
"title": "ContextPrecisionScoringFnParams"
},
"ContextRecallScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "context_recall",
"default": "context_recall"
+ },
+ "context_recall": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "context_recall"
],
"title": "ContextRecallScoringFnParams"
},
"ContextRelevancyScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "context_relevancy",
"default": "context_relevancy"
+ },
+ "context_relevancy": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "context_relevancy"
],
"title": "ContextRelevancyScoringFnParams"
},
+ "CustomLLMAsJudgeScoringFnParams": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "custom_llm_as_judge",
+ "default": "custom_llm_as_judge"
+ },
+ "custom_llm_as_judge": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "custom_llm_as_judge",
+ "default": "custom_llm_as_judge"
+ },
+ "judge_model": {
+ "type": "string"
+ },
+ "prompt_template": {
+ "type": "string"
+ },
+ "judge_score_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "judge_model"
+ ],
+ "title": "CustomLLMAsJudgeScoringFnParamsFields"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "custom_llm_as_judge"
+ ],
+ "title": "CustomLLMAsJudgeScoringFnParams"
+ },
"EqualityScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "equality",
"default": "equality"
+ },
+ "equality": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "equality"
],
"title": "EqualityScoringFnParams"
},
@@ -6672,104 +6780,78 @@
"FactualityScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "factuality",
"default": "factuality"
+ },
+ "factuality": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "factuality"
],
"title": "FactualityScoringFnParams"
},
"FaithfulnessScoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "faithfulness",
"default": "faithfulness"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "FaithfulnessScoringFnParams"
- },
- "LLMAsJudgeScoringFnParams": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "custom_llm_as_judge",
- "default": "custom_llm_as_judge"
},
- "judge_model": {
- "type": "string"
- },
- "prompt_template": {
- "type": "string"
- },
- "judge_score_regexes": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- }
+ "faithfulness": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
"type",
- "judge_model"
+ "faithfulness"
],
- "title": "LLMAsJudgeScoringFnParams"
+ "title": "FaithfulnessScoringFnParams"
},
"ModelCandidate": {
"type": "object",
@@ -6804,83 +6886,103 @@
"RegexParserMathScoringFnParams": {
"type": "object",
"properties": {
- "parsing_regexes": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "(Optional) Regexes to extract the answer from generated response."
- },
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "regex_parser_math_response",
"default": "regex_parser_math_response"
+ },
+ "regex_parser_math_response": {
+ "type": "object",
+ "properties": {
+ "parsing_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "(Optional) Regexes to extract the answer from generated response."
+ },
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "parsing_regexes"
+ ],
+ "title": "RegexParserScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "parsing_regexes",
- "type"
+ "type",
+ "regex_parser_math_response"
],
"title": "RegexParserMathScoringFnParams"
},
"RegexParserScoringFnParams": {
"type": "object",
"properties": {
- "parsing_regexes": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "(Optional) Regexes to extract the answer from generated response."
- },
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "regex_parser",
"default": "regex_parser"
+ },
+ "regex_parser": {
+ "type": "object",
+ "properties": {
+ "parsing_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "(Optional) Regexes to extract the answer from generated response."
+ },
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "parsing_regexes"
+ ],
+ "title": "RegexParserScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "parsing_regexes",
- "type"
+ "type",
+ "regex_parser"
],
"title": "RegexParserScoringFnParams"
},
"ScoringFnParams": {
"oneOf": [
{
- "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+ "$ref": "#/components/schemas/CustomLLMAsJudgeScoringFnParams"
},
{
"$ref": "#/components/schemas/RegexParserScoringFnParams"
@@ -6925,7 +7027,7 @@
"discriminator": {
"propertyName": "type",
"mapping": {
- "custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
+ "custom_llm_as_judge": "#/components/schemas/CustomLLMAsJudgeScoringFnParams",
"regex_parser": "#/components/schemas/RegexParserScoringFnParams",
"regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams",
"equality": "#/components/schemas/EqualityScoringFnParams",
@@ -6945,30 +7047,38 @@
"SubsetOfcoringFnParams": {
"type": "object",
"properties": {
- "aggregation_functions": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "average",
- "median",
- "categorical_count",
- "accuracy"
- ],
- "title": "AggregationFunctionType",
- "description": "A type of aggregation function."
- },
- "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
- },
"type": {
"type": "string",
"const": "subset_of",
"default": "subset_of"
+ },
+ "subset_of": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ },
+ "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+ }
+ },
+ "additionalProperties": false,
+ "title": "BasicScoringFnParamsFields"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "subset_of"
],
"title": "SubsetOfcoringFnParams"
},
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 80516221d..a21860766 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -4451,77 +4451,95 @@ components:
AnswerCorrectnessScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: answer_correctness
default: answer_correctness
+ answer_correctness:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - answer_correctness
title: AnswerCorrectnessScoringFnParams
AnswerRelevancyScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: answer_relevancy
default: answer_relevancy
+ answer_relevancy:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - answer_relevancy
title: AnswerRelevancyScoringFnParams
AnswerSimilarityScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: answer_similarity
default: answer_similarity
+ answer_similarity:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - answer_similarity
title: AnswerSimilarityScoringFnParams
BenchmarkConfig:
type: object
@@ -4551,127 +4569,189 @@ components:
ContextEntityRecallScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: context_entity_recall
default: context_entity_recall
+ context_entity_recall:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - context_entity_recall
title: ContextEntityRecallScoringFnParams
ContextPrecisionScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: context_precision
default: context_precision
+ context_precision:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - context_precision
title: ContextPrecisionScoringFnParams
ContextRecallScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: context_recall
default: context_recall
+ context_recall:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - context_recall
title: ContextRecallScoringFnParams
ContextRelevancyScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: context_relevancy
default: context_relevancy
+ context_relevancy:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - context_relevancy
title: ContextRelevancyScoringFnParams
+ CustomLLMAsJudgeScoringFnParams:
+ type: object
+ properties:
+ type:
+ type: string
+ const: custom_llm_as_judge
+ default: custom_llm_as_judge
+ custom_llm_as_judge:
+ type: object
+ properties:
+ type:
+ type: string
+ const: custom_llm_as_judge
+ default: custom_llm_as_judge
+ judge_model:
+ type: string
+ prompt_template:
+ type: string
+ judge_score_regexes:
+ type: array
+ items:
+ type: string
+ additionalProperties: false
+ required:
+ - type
+ - judge_model
+ title: CustomLLMAsJudgeScoringFnParamsFields
+ additionalProperties: false
+ required:
+ - type
+ - custom_llm_as_judge
+ title: CustomLLMAsJudgeScoringFnParams
EqualityScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: equality
default: equality
+ equality:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - equality
title: EqualityScoringFnParams
EvalCandidate:
oneOf:
@@ -4685,84 +4765,65 @@ components:
FactualityScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: factuality
default: factuality
+ factuality:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - factuality
title: FactualityScoringFnParams
FaithfulnessScoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: faithfulness
default: faithfulness
+ faithfulness:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - faithfulness
title: FaithfulnessScoringFnParams
- LLMAsJudgeScoringFnParams:
- type: object
- properties:
- type:
- type: string
- const: custom_llm_as_judge
- default: custom_llm_as_judge
- judge_model:
- type: string
- prompt_template:
- type: string
- judge_score_regexes:
- type: array
- items:
- type: string
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- additionalProperties: false
- required:
- - type
- - judge_model
- title: LLMAsJudgeScoringFnParams
ModelCandidate:
type: object
properties:
@@ -4791,70 +4852,84 @@ components:
RegexParserMathScoringFnParams:
type: object
properties:
- parsing_regexes:
- type: array
- items:
- type: string
- description: >-
- (Optional) Regexes to extract the answer from generated response.
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: regex_parser_math_response
default: regex_parser_math_response
+ regex_parser_math_response:
+ type: object
+ properties:
+ parsing_regexes:
+ type: array
+ items:
+ type: string
+ description: >-
+ (Optional) Regexes to extract the answer from generated response.
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ required:
+ - parsing_regexes
+ title: RegexParserScoringFnParamsFields
additionalProperties: false
required:
- - parsing_regexes
- type
+ - regex_parser_math_response
title: RegexParserMathScoringFnParams
RegexParserScoringFnParams:
type: object
properties:
- parsing_regexes:
- type: array
- items:
- type: string
- description: >-
- (Optional) Regexes to extract the answer from generated response.
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: regex_parser
default: regex_parser
+ regex_parser:
+ type: object
+ properties:
+ parsing_regexes:
+ type: array
+ items:
+ type: string
+ description: >-
+ (Optional) Regexes to extract the answer from generated response.
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ required:
+ - parsing_regexes
+ title: RegexParserScoringFnParamsFields
additionalProperties: false
required:
- - parsing_regexes
- type
+ - regex_parser
title: RegexParserScoringFnParams
ScoringFnParams:
oneOf:
- - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ - $ref: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
- $ref: '#/components/schemas/RegexParserScoringFnParams'
- $ref: '#/components/schemas/RegexParserMathScoringFnParams'
- $ref: '#/components/schemas/EqualityScoringFnParams'
@@ -4871,7 +4946,7 @@ components:
discriminator:
propertyName: type
mapping:
- custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ custom_llm_as_judge: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams'
regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams'
equality: '#/components/schemas/EqualityScoringFnParams'
@@ -4888,27 +4963,33 @@ components:
SubsetOfcoringFnParams:
type: object
properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- description: >-
- (Optional) Aggregation functions to apply to the scores of each row. If
- not provided, no aggregation will be performed.
type:
type: string
const: subset_of
default: subset_of
+ subset_of:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ description: >-
+ (Optional) Aggregation functions to apply to the scores of each row.
+ If not provided, no aggregation will be performed.
+ additionalProperties: false
+ title: BasicScoringFnParamsFields
additionalProperties: false
required:
- type
+ - subset_of
title: SubsetOfcoringFnParams
EvaluateRowsRequest:
type: object
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index 35c0dc9d1..6546e71f9 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -67,7 +67,7 @@ class AggregationFunctionType(Enum):
accuracy = "accuracy"
-class BasicScoringFnParamsCommon(BaseModel):
+class BasicScoringFnParamsFields(BaseModel):
"""
:param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
"""
@@ -78,7 +78,7 @@ class BasicScoringFnParamsCommon(BaseModel):
)
-class RegexParserScoringFnParamsCommon(BaseModel):
+class RegexParserScoringFnParamsFields(BaseModel):
"""
:param parsing_regexes: (Optional) Regexes to extract the answer from generated response.
:param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
@@ -93,74 +93,7 @@ class RegexParserScoringFnParamsCommon(BaseModel):
default_factory=list,
)
-
-@json_schema_type
-class RegexParserScoringFnParams(RegexParserScoringFnParamsCommon):
- type: Literal["regex_parser"] = "regex_parser"
-
-
-@json_schema_type
-class RegexParserMathScoringFnParams(RegexParserScoringFnParamsCommon):
- type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
-
-
-@json_schema_type
-class EqualityScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["equality"] = "equality"
-
-
-@json_schema_type
-class SubsetOfcoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["subset_of"] = "subset_of"
-
-
-@json_schema_type
-class FactualityScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["factuality"] = "factuality"
-
-
-@json_schema_type
-class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["faithfulness"] = "faithfulness"
-
-
-@json_schema_type
-class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["answer_correctness"] = "answer_correctness"
-
-
-@json_schema_type
-class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["answer_relevancy"] = "answer_relevancy"
-
-
-@json_schema_type
-class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["answer_similarity"] = "answer_similarity"
-
-
-@json_schema_type
-class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["context_entity_recall"] = "context_entity_recall"
-
-
-@json_schema_type
-class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["context_precision"] = "context_precision"
-
-
-@json_schema_type
-class ContextRecallScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["context_recall"] = "context_recall"
-
-
-@json_schema_type
-class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon):
- type: Literal["context_relevancy"] = "context_relevancy"
-
-
-@json_schema_type
-class LLMAsJudgeScoringFnParams(BaseModel):
+class CustomLLMAsJudgeScoringFnParamsFields(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
judge_model: str
prompt_template: Optional[str] = None
@@ -168,16 +101,84 @@ class LLMAsJudgeScoringFnParams(BaseModel):
description="Regexes to extract the answer from generated response",
default_factory=list,
)
- aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
- description="Aggregation functions to apply to the scores of each row",
- default_factory=list,
- )
+
+@json_schema_type
+class RegexParserScoringFnParams(BaseModel):
+ type: Literal["regex_parser"] = "regex_parser"
+ regex_parser: RegexParserScoringFnParamsFields
+
+
+@json_schema_type
+class RegexParserMathScoringFnParams(BaseModel):
+ type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
+ regex_parser_math_response: RegexParserScoringFnParamsFields
+
+@json_schema_type
+class EqualityScoringFnParams(BaseModel):
+ type: Literal["equality"] = "equality"
+ equality: BasicScoringFnParamsFields
+
+@json_schema_type
+class SubsetOfcoringFnParams(BaseModel):
+ type: Literal["subset_of"] = "subset_of"
+ subset_of: BasicScoringFnParamsFields
+
+@json_schema_type
+class FactualityScoringFnParams(BaseModel):
+ type: Literal["factuality"] = "factuality"
+ factuality: BasicScoringFnParamsFields
+
+@json_schema_type
+class FaithfulnessScoringFnParams(BaseModel):
+ type: Literal["faithfulness"] = "faithfulness"
+ faithfulness: BasicScoringFnParamsFields
+
+@json_schema_type
+class AnswerCorrectnessScoringFnParams(BaseModel):
+ type: Literal["answer_correctness"] = "answer_correctness"
+ answer_correctness: BasicScoringFnParamsFields
+
+@json_schema_type
+class AnswerRelevancyScoringFnParams(BaseModel):
+ type: Literal["answer_relevancy"] = "answer_relevancy"
+ answer_relevancy: BasicScoringFnParamsFields
+
+@json_schema_type
+class AnswerSimilarityScoringFnParams(BaseModel):
+ type: Literal["answer_similarity"] = "answer_similarity"
+ answer_similarity: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextEntityRecallScoringFnParams(BaseModel):
+ type: Literal["context_entity_recall"] = "context_entity_recall"
+ context_entity_recall: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextPrecisionScoringFnParams(BaseModel):
+ type: Literal["context_precision"] = "context_precision"
+ context_precision: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextRecallScoringFnParams(BaseModel):
+ type: Literal["context_recall"] = "context_recall"
+ context_recall: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextRelevancyScoringFnParams(BaseModel):
+ type: Literal["context_relevancy"] = "context_relevancy"
+ context_relevancy: BasicScoringFnParamsFields
+
+
+@json_schema_type
+class CustomLLMAsJudgeScoringFnParams(BaseModel):
+ type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
+ custom_llm_as_judge: CustomLLMAsJudgeScoringFnParamsFields
ScoringFnParams = register_schema(
Annotated[
Union[
- LLMAsJudgeScoringFnParams,
+ CustomLLMAsJudgeScoringFnParams,
RegexParserScoringFnParams,
RegexParserMathScoringFnParams,
EqualityScoringFnParams,