From a7abe6df740fc24d1edd461797b4fac83d679120 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 12 Mar 2025 21:31:22 -0700 Subject: [PATCH] better params fields --- docs/_static/llama-stack-spec.html | 644 ++++++++++-------- docs/_static/llama-stack-spec.yaml | 539 ++++++++------- .../scoring_functions/scoring_functions.py | 151 ++-- 3 files changed, 763 insertions(+), 571 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 8142b1c7b..7c28fd8e5 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -6388,90 +6388,114 @@ "AnswerCorrectnessScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "answer_correctness", "default": "answer_correctness" + }, + "answer_correctness": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "answer_correctness" ], "title": "AnswerCorrectnessScoringFnParams" }, "AnswerRelevancyScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "answer_relevancy", "default": "answer_relevancy" + }, + "answer_relevancy": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "answer_relevancy" ], "title": "AnswerRelevancyScoringFnParams" }, "AnswerSimilarityScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "answer_similarity", "default": "answer_similarity" + }, + "answer_similarity": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "answer_similarity" ], "title": "AnswerSimilarityScoringFnParams" }, @@ -6505,150 +6529,234 @@ "ContextEntityRecallScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "context_entity_recall", "default": "context_entity_recall" + }, + "context_entity_recall": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "context_entity_recall" ], "title": "ContextEntityRecallScoringFnParams" }, "ContextPrecisionScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "context_precision", "default": "context_precision" + }, + "context_precision": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "context_precision" ], "title": "ContextPrecisionScoringFnParams" }, "ContextRecallScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "context_recall", "default": "context_recall" + }, + "context_recall": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "context_recall" ], "title": "ContextRecallScoringFnParams" }, "ContextRelevancyScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "context_relevancy", "default": "context_relevancy" + }, + "context_relevancy": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "context_relevancy" ], "title": "ContextRelevancyScoringFnParams" }, + "CustomLLMAsJudgeScoringFnParams": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "custom_llm_as_judge", + "default": "custom_llm_as_judge" + }, + "custom_llm_as_judge": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "custom_llm_as_judge", + "default": "custom_llm_as_judge" + }, + "judge_model": { + "type": "string" + }, + "prompt_template": { + "type": "string" + }, + "judge_score_regexes": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "type", + "judge_model" + ], + "title": "CustomLLMAsJudgeScoringFnParamsFields" + } + }, + "additionalProperties": false, + "required": [ + "type", + "custom_llm_as_judge" + ], + "title": "CustomLLMAsJudgeScoringFnParams" + }, "EqualityScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "equality", "default": "equality" + }, + "equality": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "equality" ], "title": "EqualityScoringFnParams" }, @@ -6672,104 +6780,78 @@ "FactualityScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "factuality", "default": "factuality" + }, + "factuality": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "factuality" ], "title": "FactualityScoringFnParams" }, "FaithfulnessScoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "faithfulness", "default": "faithfulness" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "FaithfulnessScoringFnParams" - }, - "LLMAsJudgeScoringFnParams": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "custom_llm_as_judge", - "default": "custom_llm_as_judge" }, - "judge_model": { - "type": "string" - }, - "prompt_template": { - "type": "string" - }, - "judge_score_regexes": { - "type": "array", - "items": { - "type": "string" - } - }, - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - } + "faithfulness": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ "type", - "judge_model" + "faithfulness" ], - "title": "LLMAsJudgeScoringFnParams" + "title": "FaithfulnessScoringFnParams" }, "ModelCandidate": { "type": "object", @@ -6804,83 +6886,103 @@ "RegexParserMathScoringFnParams": { "type": "object", "properties": { - "parsing_regexes": { - "type": "array", - "items": { - "type": "string" - }, - "description": "(Optional) Regexes to extract the answer from generated response." - }, - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "regex_parser_math_response", "default": "regex_parser_math_response" + }, + "regex_parser_math_response": { + "type": "object", + "properties": { + "parsing_regexes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "(Optional) Regexes to extract the answer from generated response." + }, + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "required": [ + "parsing_regexes" + ], + "title": "RegexParserScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "parsing_regexes", - "type" + "type", + "regex_parser_math_response" ], "title": "RegexParserMathScoringFnParams" }, "RegexParserScoringFnParams": { "type": "object", "properties": { - "parsing_regexes": { - "type": "array", - "items": { - "type": "string" - }, - "description": "(Optional) Regexes to extract the answer from generated response." - }, - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "regex_parser", "default": "regex_parser" + }, + "regex_parser": { + "type": "object", + "properties": { + "parsing_regexes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "(Optional) Regexes to extract the answer from generated response." + }, + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "required": [ + "parsing_regexes" + ], + "title": "RegexParserScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "parsing_regexes", - "type" + "type", + "regex_parser" ], "title": "RegexParserScoringFnParams" }, "ScoringFnParams": { "oneOf": [ { - "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams" + "$ref": "#/components/schemas/CustomLLMAsJudgeScoringFnParams" }, { "$ref": "#/components/schemas/RegexParserScoringFnParams" @@ -6925,7 +7027,7 @@ "discriminator": { "propertyName": "type", "mapping": { - "custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams", + "custom_llm_as_judge": "#/components/schemas/CustomLLMAsJudgeScoringFnParams", "regex_parser": "#/components/schemas/RegexParserScoringFnParams", "regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams", "equality": "#/components/schemas/EqualityScoringFnParams", @@ -6945,30 +7047,38 @@ "SubsetOfcoringFnParams": { "type": "object", "properties": { - "aggregation_functions": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "average", - "median", - "categorical_count", - "accuracy" - ], - "title": "AggregationFunctionType", - "description": "A type of aggregation function." - }, - "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." - }, "type": { "type": "string", "const": "subset_of", "default": "subset_of" + }, + "subset_of": { + "type": "object", + "properties": { + "aggregation_functions": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "average", + "median", + "categorical_count", + "accuracy" + ], + "title": "AggregationFunctionType", + "description": "A type of aggregation function." + }, + "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." + } + }, + "additionalProperties": false, + "title": "BasicScoringFnParamsFields" } }, "additionalProperties": false, "required": [ - "type" + "type", + "subset_of" ], "title": "SubsetOfcoringFnParams" }, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 80516221d..a21860766 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -4451,77 +4451,95 @@ components: AnswerCorrectnessScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: answer_correctness default: answer_correctness + answer_correctness: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - answer_correctness title: AnswerCorrectnessScoringFnParams AnswerRelevancyScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: answer_relevancy default: answer_relevancy + answer_relevancy: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - answer_relevancy title: AnswerRelevancyScoringFnParams AnswerSimilarityScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: answer_similarity default: answer_similarity + answer_similarity: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - answer_similarity title: AnswerSimilarityScoringFnParams BenchmarkConfig: type: object @@ -4551,127 +4569,189 @@ components: ContextEntityRecallScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: context_entity_recall default: context_entity_recall + context_entity_recall: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - context_entity_recall title: ContextEntityRecallScoringFnParams ContextPrecisionScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: context_precision default: context_precision + context_precision: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - context_precision title: ContextPrecisionScoringFnParams ContextRecallScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: context_recall default: context_recall + context_recall: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - context_recall title: ContextRecallScoringFnParams ContextRelevancyScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: context_relevancy default: context_relevancy + context_relevancy: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - context_relevancy title: ContextRelevancyScoringFnParams + CustomLLMAsJudgeScoringFnParams: + type: object + properties: + type: + type: string + const: custom_llm_as_judge + default: custom_llm_as_judge + custom_llm_as_judge: + type: object + properties: + type: + type: string + const: custom_llm_as_judge + default: custom_llm_as_judge + judge_model: + type: string + prompt_template: + type: string + judge_score_regexes: + type: array + items: + type: string + additionalProperties: false + required: + - type + - judge_model + title: CustomLLMAsJudgeScoringFnParamsFields + additionalProperties: false + required: + - type + - custom_llm_as_judge + title: CustomLLMAsJudgeScoringFnParams EqualityScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: equality default: equality + equality: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - equality title: EqualityScoringFnParams EvalCandidate: oneOf: @@ -4685,84 +4765,65 @@ components: FactualityScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: factuality default: factuality + factuality: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - factuality title: FactualityScoringFnParams FaithfulnessScoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: faithfulness default: faithfulness + faithfulness: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - faithfulness title: FaithfulnessScoringFnParams - LLMAsJudgeScoringFnParams: - type: object - properties: - type: - type: string - const: custom_llm_as_judge - default: custom_llm_as_judge - judge_model: - type: string - prompt_template: - type: string - judge_score_regexes: - type: array - items: - type: string - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - additionalProperties: false - required: - - type - - judge_model - title: LLMAsJudgeScoringFnParams ModelCandidate: type: object properties: @@ -4791,70 +4852,84 @@ components: RegexParserMathScoringFnParams: type: object properties: - parsing_regexes: - type: array - items: - type: string - description: >- - (Optional) Regexes to extract the answer from generated response. - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: regex_parser_math_response default: regex_parser_math_response + regex_parser_math_response: + type: object + properties: + parsing_regexes: + type: array + items: + type: string + description: >- + (Optional) Regexes to extract the answer from generated response. + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + required: + - parsing_regexes + title: RegexParserScoringFnParamsFields additionalProperties: false required: - - parsing_regexes - type + - regex_parser_math_response title: RegexParserMathScoringFnParams RegexParserScoringFnParams: type: object properties: - parsing_regexes: - type: array - items: - type: string - description: >- - (Optional) Regexes to extract the answer from generated response. - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: regex_parser default: regex_parser + regex_parser: + type: object + properties: + parsing_regexes: + type: array + items: + type: string + description: >- + (Optional) Regexes to extract the answer from generated response. + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + required: + - parsing_regexes + title: RegexParserScoringFnParamsFields additionalProperties: false required: - - parsing_regexes - type + - regex_parser title: RegexParserScoringFnParams ScoringFnParams: oneOf: - - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams' + - $ref: '#/components/schemas/CustomLLMAsJudgeScoringFnParams' - $ref: '#/components/schemas/RegexParserScoringFnParams' - $ref: '#/components/schemas/RegexParserMathScoringFnParams' - $ref: '#/components/schemas/EqualityScoringFnParams' @@ -4871,7 +4946,7 @@ components: discriminator: propertyName: type mapping: - custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams' + custom_llm_as_judge: '#/components/schemas/CustomLLMAsJudgeScoringFnParams' regex_parser: '#/components/schemas/RegexParserScoringFnParams' regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams' equality: '#/components/schemas/EqualityScoringFnParams' @@ -4888,27 +4963,33 @@ components: SubsetOfcoringFnParams: type: object properties: - aggregation_functions: - type: array - items: - type: string - enum: - - average - - median - - categorical_count - - accuracy - title: AggregationFunctionType - description: A type of aggregation function. - description: >- - (Optional) Aggregation functions to apply to the scores of each row. If - not provided, no aggregation will be performed. type: type: string const: subset_of default: subset_of + subset_of: + type: object + properties: + aggregation_functions: + type: array + items: + type: string + enum: + - average + - median + - categorical_count + - accuracy + title: AggregationFunctionType + description: A type of aggregation function. + description: >- + (Optional) Aggregation functions to apply to the scores of each row. + If not provided, no aggregation will be performed. + additionalProperties: false + title: BasicScoringFnParamsFields additionalProperties: false required: - type + - subset_of title: SubsetOfcoringFnParams EvaluateRowsRequest: type: object diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py index 35c0dc9d1..6546e71f9 100644 --- a/llama_stack/apis/scoring_functions/scoring_functions.py +++ b/llama_stack/apis/scoring_functions/scoring_functions.py @@ -67,7 +67,7 @@ class AggregationFunctionType(Enum): accuracy = "accuracy" -class BasicScoringFnParamsCommon(BaseModel): +class BasicScoringFnParamsFields(BaseModel): """ :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed. """ @@ -78,7 +78,7 @@ class BasicScoringFnParamsCommon(BaseModel): ) -class RegexParserScoringFnParamsCommon(BaseModel): +class RegexParserScoringFnParamsFields(BaseModel): """ :param parsing_regexes: (Optional) Regexes to extract the answer from generated response. :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed. @@ -93,74 +93,7 @@ class RegexParserScoringFnParamsCommon(BaseModel): default_factory=list, ) - -@json_schema_type -class RegexParserScoringFnParams(RegexParserScoringFnParamsCommon): - type: Literal["regex_parser"] = "regex_parser" - - -@json_schema_type -class RegexParserMathScoringFnParams(RegexParserScoringFnParamsCommon): - type: Literal["regex_parser_math_response"] = "regex_parser_math_response" - - -@json_schema_type -class EqualityScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["equality"] = "equality" - - -@json_schema_type -class SubsetOfcoringFnParams(BasicScoringFnParamsCommon): - type: Literal["subset_of"] = "subset_of" - - -@json_schema_type -class FactualityScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["factuality"] = "factuality" - - -@json_schema_type -class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["faithfulness"] = "faithfulness" - - -@json_schema_type -class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["answer_correctness"] = "answer_correctness" - - -@json_schema_type -class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["answer_relevancy"] = "answer_relevancy" - - -@json_schema_type -class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["answer_similarity"] = "answer_similarity" - - -@json_schema_type -class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["context_entity_recall"] = "context_entity_recall" - - -@json_schema_type -class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["context_precision"] = "context_precision" - - -@json_schema_type -class ContextRecallScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["context_recall"] = "context_recall" - - -@json_schema_type -class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon): - type: Literal["context_relevancy"] = "context_relevancy" - - -@json_schema_type -class LLMAsJudgeScoringFnParams(BaseModel): +class CustomLLMAsJudgeScoringFnParamsFields(BaseModel): type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge" judge_model: str prompt_template: Optional[str] = None @@ -168,16 +101,84 @@ class LLMAsJudgeScoringFnParams(BaseModel): description="Regexes to extract the answer from generated response", default_factory=list, ) - aggregation_functions: Optional[List[AggregationFunctionType]] = Field( - description="Aggregation functions to apply to the scores of each row", - default_factory=list, - ) + +@json_schema_type +class RegexParserScoringFnParams(BaseModel): + type: Literal["regex_parser"] = "regex_parser" + regex_parser: RegexParserScoringFnParamsFields + + +@json_schema_type +class RegexParserMathScoringFnParams(BaseModel): + type: Literal["regex_parser_math_response"] = "regex_parser_math_response" + regex_parser_math_response: RegexParserScoringFnParamsFields + +@json_schema_type +class EqualityScoringFnParams(BaseModel): + type: Literal["equality"] = "equality" + equality: BasicScoringFnParamsFields + +@json_schema_type +class SubsetOfcoringFnParams(BaseModel): + type: Literal["subset_of"] = "subset_of" + subset_of: BasicScoringFnParamsFields + +@json_schema_type +class FactualityScoringFnParams(BaseModel): + type: Literal["factuality"] = "factuality" + factuality: BasicScoringFnParamsFields + +@json_schema_type +class FaithfulnessScoringFnParams(BaseModel): + type: Literal["faithfulness"] = "faithfulness" + faithfulness: BasicScoringFnParamsFields + +@json_schema_type +class AnswerCorrectnessScoringFnParams(BaseModel): + type: Literal["answer_correctness"] = "answer_correctness" + answer_correctness: BasicScoringFnParamsFields + +@json_schema_type +class AnswerRelevancyScoringFnParams(BaseModel): + type: Literal["answer_relevancy"] = "answer_relevancy" + answer_relevancy: BasicScoringFnParamsFields + +@json_schema_type +class AnswerSimilarityScoringFnParams(BaseModel): + type: Literal["answer_similarity"] = "answer_similarity" + answer_similarity: BasicScoringFnParamsFields + +@json_schema_type +class ContextEntityRecallScoringFnParams(BaseModel): + type: Literal["context_entity_recall"] = "context_entity_recall" + context_entity_recall: BasicScoringFnParamsFields + +@json_schema_type +class ContextPrecisionScoringFnParams(BaseModel): + type: Literal["context_precision"] = "context_precision" + context_precision: BasicScoringFnParamsFields + +@json_schema_type +class ContextRecallScoringFnParams(BaseModel): + type: Literal["context_recall"] = "context_recall" + context_recall: BasicScoringFnParamsFields + +@json_schema_type +class ContextRelevancyScoringFnParams(BaseModel): + type: Literal["context_relevancy"] = "context_relevancy" + context_relevancy: BasicScoringFnParamsFields + + +@json_schema_type +class CustomLLMAsJudgeScoringFnParams(BaseModel): + type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge" + custom_llm_as_judge: CustomLLMAsJudgeScoringFnParamsFields ScoringFnParams = register_schema( Annotated[ Union[ - LLMAsJudgeScoringFnParams, + CustomLLMAsJudgeScoringFnParams, RegexParserScoringFnParams, RegexParserMathScoringFnParams, EqualityScoringFnParams,