better params fields

2026-01-03 20:52:14 +00:00 · 2025-03-12 21:31:22 -07:00 · 2025-03-12 21:31:22 -07:00 · a7abe6df74
commit a7abe6df74
parent 93c131ed5f
3 changed files with 763 additions and 571 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -6386,6 +6386,14 @@
                "description": "An agent candidate for evaluation."
            },
            "AnswerCorrectnessScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "answer_correctness",
+                        "default": "answer_correctness"
+                    },
+                    "answer_correctness": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6402,50 +6410,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "answer_correctness",
-                        "default": "answer_correctness"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "answer_correctness"
                ],
                "title": "AnswerCorrectnessScoringFnParams"
            },
            "AnswerRelevancyScoringFnParams": {
                "type": "object",
                "properties": {
-                    "aggregation_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string",
-                            "enum": [
-                                "average",
-                                "median",
-                                "categorical_count",
-                                "accuracy"
-                            ],
-                            "title": "AggregationFunctionType",
-                            "description": "A type of aggregation function."
-                        },
-                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
-                    },
                    "type": {
                        "type": "string",
                        "const": "answer_relevancy",
                        "default": "answer_relevancy"
-                    }
                    },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "AnswerRelevancyScoringFnParams"
-            },
-            "AnswerSimilarityScoringFnParams": {
+                    "answer_relevancy": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6462,16 +6448,54 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "answer_similarity",
-                        "default": "answer_similarity"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "answer_relevancy"
+                ],
+                "title": "AnswerRelevancyScoringFnParams"
+            },
+            "AnswerSimilarityScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "answer_similarity",
+                        "default": "answer_similarity"
+                    },
+                    "answer_similarity": {
+                        "type": "object",
+                        "properties": {
+                            "aggregation_functions": {
+                                "type": "array",
+                                "items": {
+                                    "type": "string",
+                                    "enum": [
+                                        "average",
+                                        "median",
+                                        "categorical_count",
+                                        "accuracy"
+                                    ],
+                                    "title": "AggregationFunctionType",
+                                    "description": "A type of aggregation function."
+                                },
+                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
+                        },
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "answer_similarity"
                ],
                "title": "AnswerSimilarityScoringFnParams"
            },
@ -6503,6 +6527,14 @@
                "description": "A benchmark configuration for evaluation."
            },
            "ContextEntityRecallScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "context_entity_recall",
+                        "default": "context_entity_recall"
+                    },
+                    "context_entity_recall": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6519,20 +6551,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "context_entity_recall",
-                        "default": "context_entity_recall"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "context_entity_recall"
                ],
                "title": "ContextEntityRecallScoringFnParams"
            },
            "ContextPrecisionScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "context_precision",
+                        "default": "context_precision"
+                    },
+                    "context_precision": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6549,20 +6589,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "context_precision",
-                        "default": "context_precision"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "context_precision"
                ],
                "title": "ContextPrecisionScoringFnParams"
            },
            "ContextRecallScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "context_recall",
+                        "default": "context_recall"
+                    },
+                    "context_recall": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6579,50 +6627,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "context_recall",
-                        "default": "context_recall"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "context_recall"
                ],
                "title": "ContextRecallScoringFnParams"
            },
            "ContextRelevancyScoringFnParams": {
                "type": "object",
                "properties": {
-                    "aggregation_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string",
-                            "enum": [
-                                "average",
-                                "median",
-                                "categorical_count",
-                                "accuracy"
-                            ],
-                            "title": "AggregationFunctionType",
-                            "description": "A type of aggregation function."
-                        },
-                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
-                    },
                    "type": {
                        "type": "string",
                        "const": "context_relevancy",
                        "default": "context_relevancy"
-                    }
                    },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "ContextRelevancyScoringFnParams"
-            },
-            "EqualityScoringFnParams": {
+                    "context_relevancy": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6639,16 +6665,98 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "equality",
-                        "default": "equality"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "context_relevancy"
+                ],
+                "title": "ContextRelevancyScoringFnParams"
+            },
+            "CustomLLMAsJudgeScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "custom_llm_as_judge",
+                        "default": "custom_llm_as_judge"
+                    },
+                    "custom_llm_as_judge": {
+                        "type": "object",
+                        "properties": {
+                            "type": {
+                                "type": "string",
+                                "const": "custom_llm_as_judge",
+                                "default": "custom_llm_as_judge"
+                            },
+                            "judge_model": {
+                                "type": "string"
+                            },
+                            "prompt_template": {
+                                "type": "string"
+                            },
+                            "judge_score_regexes": {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "type",
+                            "judge_model"
+                        ],
+                        "title": "CustomLLMAsJudgeScoringFnParamsFields"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "custom_llm_as_judge"
+                ],
+                "title": "CustomLLMAsJudgeScoringFnParams"
+            },
+            "EqualityScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "equality",
+                        "default": "equality"
+                    },
+                    "equality": {
+                        "type": "object",
+                        "properties": {
+                            "aggregation_functions": {
+                                "type": "array",
+                                "items": {
+                                    "type": "string",
+                                    "enum": [
+                                        "average",
+                                        "median",
+                                        "categorical_count",
+                                        "accuracy"
+                                    ],
+                                    "title": "AggregationFunctionType",
+                                    "description": "A type of aggregation function."
+                                },
+                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
+                        },
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "equality"
                ],
                "title": "EqualityScoringFnParams"
            },
@ -6670,6 +6778,14 @@
                }
            },
            "FactualityScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "factuality",
+                        "default": "factuality"
+                    },
+                    "factuality": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6686,20 +6802,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "factuality",
-                        "default": "factuality"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "factuality"
                ],
                "title": "FactualityScoringFnParams"
            },
            "FaithfulnessScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "faithfulness",
+                        "default": "faithfulness"
+                    },
+                    "faithfulness": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6716,60 +6840,18 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "faithfulness",
-                        "default": "faithfulness"
                            }
                        },
                        "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "FaithfulnessScoringFnParams"
-            },
-            "LLMAsJudgeScoringFnParams": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "custom_llm_as_judge",
-                        "default": "custom_llm_as_judge"
-                    },
-                    "judge_model": {
-                        "type": "string"
-                    },
-                    "prompt_template": {
-                        "type": "string"
-                    },
-                    "judge_score_regexes": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
-                    },
-                    "aggregation_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string",
-                            "enum": [
-                                "average",
-                                "median",
-                                "categorical_count",
-                                "accuracy"
-                            ],
-                            "title": "AggregationFunctionType",
-                            "description": "A type of aggregation function."
-                        }
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
-                    "judge_model"
+                    "faithfulness"
                ],
-                "title": "LLMAsJudgeScoringFnParams"
+                "title": "FaithfulnessScoringFnParams"
            },
            "ModelCandidate": {
                "type": "object",
@ -6804,42 +6886,12 @@
            "RegexParserMathScoringFnParams": {
                "type": "object",
                "properties": {
-                    "parsing_regexes": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "(Optional) Regexes to extract the answer from generated response."
-                    },
-                    "aggregation_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string",
-                            "enum": [
-                                "average",
-                                "median",
-                                "categorical_count",
-                                "accuracy"
-                            ],
-                            "title": "AggregationFunctionType",
-                            "description": "A type of aggregation function."
-                        },
-                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
-                    },
                    "type": {
                        "type": "string",
                        "const": "regex_parser_math_response",
                        "default": "regex_parser_math_response"
-                    }
                    },
-                "additionalProperties": false,
-                "required": [
-                    "parsing_regexes",
-                    "type"
-                ],
-                "title": "RegexParserMathScoringFnParams"
-            },
-            "RegexParserScoringFnParams": {
+                    "regex_parser_math_response": {
                        "type": "object",
                        "properties": {
                            "parsing_regexes": {
@ -6863,24 +6915,74 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "regex_parser",
-                        "default": "regex_parser"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
-                    "parsing_regexes",
-                    "type"
+                            "parsing_regexes"
+                        ],
+                        "title": "RegexParserScoringFnParamsFields"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "regex_parser_math_response"
+                ],
+                "title": "RegexParserMathScoringFnParams"
+            },
+            "RegexParserScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "regex_parser",
+                        "default": "regex_parser"
+                    },
+                    "regex_parser": {
+                        "type": "object",
+                        "properties": {
+                            "parsing_regexes": {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                },
+                                "description": "(Optional) Regexes to extract the answer from generated response."
+                            },
+                            "aggregation_functions": {
+                                "type": "array",
+                                "items": {
+                                    "type": "string",
+                                    "enum": [
+                                        "average",
+                                        "median",
+                                        "categorical_count",
+                                        "accuracy"
+                                    ],
+                                    "title": "AggregationFunctionType",
+                                    "description": "A type of aggregation function."
+                                },
+                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "parsing_regexes"
+                        ],
+                        "title": "RegexParserScoringFnParamsFields"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "regex_parser"
                ],
                "title": "RegexParserScoringFnParams"
            },
            "ScoringFnParams": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+                        "$ref": "#/components/schemas/CustomLLMAsJudgeScoringFnParams"
                    },
                    {
                        "$ref": "#/components/schemas/RegexParserScoringFnParams"
@ -6925,7 +7027,7 @@
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
-                        "custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
+                        "custom_llm_as_judge": "#/components/schemas/CustomLLMAsJudgeScoringFnParams",
                        "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
                        "regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams",
                        "equality": "#/components/schemas/EqualityScoringFnParams",
@ -6943,6 +7045,14 @@
                }
            },
            "SubsetOfcoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "subset_of",
+                        "default": "subset_of"
+                    },
+                    "subset_of": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6959,16 +7069,16 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
+                            }
                        },
-                    "type": {
-                        "type": "string",
-                        "const": "subset_of",
-                        "default": "subset_of"
+                        "additionalProperties": false,
+                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
+                    "subset_of"
                ],
                "title": "SubsetOfcoringFnParams"
            },
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -4451,54 +4451,42 @@ components:
    AnswerCorrectnessScoringFnParams:
      type: object
      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
          const: answer_correctness
          default: answer_correctness
+        answer_correctness:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
+        - answer_correctness
      title: AnswerCorrectnessScoringFnParams
    AnswerRelevancyScoringFnParams:
      type: object
      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
          const: answer_relevancy
          default: answer_relevancy
-      additionalProperties: false
-      required:
-        - type
-      title: AnswerRelevancyScoringFnParams
-    AnswerSimilarityScoringFnParams:
+        answer_relevancy:
          type: object
          properties:
            aggregation_functions:
@ -4513,15 +4501,45 @@ components:
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
+      additionalProperties: false
+      required:
+        - type
+        - answer_relevancy
+      title: AnswerRelevancyScoringFnParams
+    AnswerSimilarityScoringFnParams:
+      type: object
+      properties:
        type:
          type: string
          const: answer_similarity
          default: answer_similarity
+        answer_similarity:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
+        - answer_similarity
      title: AnswerSimilarityScoringFnParams
    BenchmarkConfig:
      type: object
@ -4551,188 +4569,135 @@ components:
    ContextEntityRecallScoringFnParams:
      type: object
      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_entity_recall
          default: context_entity_recall
+        context_entity_recall:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
+        - context_entity_recall
      title: ContextEntityRecallScoringFnParams
    ContextPrecisionScoringFnParams:
      type: object
      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_precision
          default: context_precision
+        context_precision:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
+        - context_precision
      title: ContextPrecisionScoringFnParams
    ContextRecallScoringFnParams:
      type: object
      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_recall
          default: context_recall
+        context_recall:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
+        - context_recall
      title: ContextRecallScoringFnParams
    ContextRelevancyScoringFnParams:
      type: object
      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_relevancy
          default: context_relevancy
+        context_relevancy:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
+        - context_relevancy
      title: ContextRelevancyScoringFnParams
-    EqualityScoringFnParams:
+    CustomLLMAsJudgeScoringFnParams:
      type: object
      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
-          const: equality
-          default: equality
-      additionalProperties: false
-      required:
-        - type
-      title: EqualityScoringFnParams
-    EvalCandidate:
-      oneOf:
-        - $ref: '#/components/schemas/ModelCandidate'
-        - $ref: '#/components/schemas/AgentCandidate'
-      discriminator:
-        propertyName: type
-        mapping:
-          model: '#/components/schemas/ModelCandidate'
-          agent: '#/components/schemas/AgentCandidate'
-    FactualityScoringFnParams:
-      type: object
-      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
-        type:
-          type: string
-          const: factuality
-          default: factuality
-      additionalProperties: false
-      required:
-        - type
-      title: FactualityScoringFnParams
-    FaithfulnessScoringFnParams:
-      type: object
-      properties:
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
-        type:
-          type: string
-          const: faithfulness
-          default: faithfulness
-      additionalProperties: false
-      required:
-        - type
-      title: FaithfulnessScoringFnParams
-    LLMAsJudgeScoringFnParams:
+          const: custom_llm_as_judge
+          default: custom_llm_as_judge
+        custom_llm_as_judge:
          type: object
          properties:
            type:
@ -4747,6 +4712,26 @@ components:
              type: array
              items:
                type: string
+          additionalProperties: false
+          required:
+            - type
+            - judge_model
+          title: CustomLLMAsJudgeScoringFnParamsFields
+      additionalProperties: false
+      required:
+        - type
+        - custom_llm_as_judge
+      title: CustomLLMAsJudgeScoringFnParams
+    EqualityScoringFnParams:
+      type: object
+      properties:
+        type:
+          type: string
+          const: equality
+          default: equality
+        equality:
+          type: object
+          properties:
            aggregation_functions:
              type: array
              items:
@ -4758,11 +4743,87 @@ components:
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
-        - judge_model
-      title: LLMAsJudgeScoringFnParams
+        - equality
+      title: EqualityScoringFnParams
+    EvalCandidate:
+      oneOf:
+        - $ref: '#/components/schemas/ModelCandidate'
+        - $ref: '#/components/schemas/AgentCandidate'
+      discriminator:
+        propertyName: type
+        mapping:
+          model: '#/components/schemas/ModelCandidate'
+          agent: '#/components/schemas/AgentCandidate'
+    FactualityScoringFnParams:
+      type: object
+      properties:
+        type:
+          type: string
+          const: factuality
+          default: factuality
+        factuality:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
+      additionalProperties: false
+      required:
+        - type
+        - factuality
+      title: FactualityScoringFnParams
+    FaithfulnessScoringFnParams:
+      type: object
+      properties:
+        type:
+          type: string
+          const: faithfulness
+          default: faithfulness
+        faithfulness:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
+      additionalProperties: false
+      required:
+        - type
+        - faithfulness
+      title: FaithfulnessScoringFnParams
    ModelCandidate:
      type: object
      properties:
@ -4791,36 +4852,11 @@ components:
    RegexParserMathScoringFnParams:
      type: object
      properties:
-        parsing_regexes:
-          type: array
-          items:
-            type: string
-          description: >-
-            (Optional) Regexes to extract the answer from generated response.
-        aggregation_functions:
-          type: array
-          items:
-            type: string
-            enum:
-              - average
-              - median
-              - categorical_count
-              - accuracy
-            title: AggregationFunctionType
-            description: A type of aggregation function.
-          description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
        type:
          type: string
          const: regex_parser_math_response
          default: regex_parser_math_response
-      additionalProperties: false
-      required:
-        - parsing_regexes
-        - type
-      title: RegexParserMathScoringFnParams
-    RegexParserScoringFnParams:
+        regex_parser_math_response:
          type: object
          properties:
            parsing_regexes:
@ -4841,20 +4877,59 @@ components:
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          required:
+            - parsing_regexes
+          title: RegexParserScoringFnParamsFields
+      additionalProperties: false
+      required:
+        - type
+        - regex_parser_math_response
+      title: RegexParserMathScoringFnParams
+    RegexParserScoringFnParams:
+      type: object
+      properties:
        type:
          type: string
          const: regex_parser
          default: regex_parser
+        regex_parser:
+          type: object
+          properties:
+            parsing_regexes:
+              type: array
+              items:
+                type: string
+              description: >-
+                (Optional) Regexes to extract the answer from generated response.
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+              description: >-
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
          additionalProperties: false
          required:
            - parsing_regexes
+          title: RegexParserScoringFnParamsFields
+      additionalProperties: false
+      required:
        - type
+        - regex_parser
      title: RegexParserScoringFnParams
    ScoringFnParams:
      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        - $ref: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
        - $ref: '#/components/schemas/RegexParserScoringFnParams'
        - $ref: '#/components/schemas/RegexParserMathScoringFnParams'
        - $ref: '#/components/schemas/EqualityScoringFnParams'
@ -4871,7 +4946,7 @@ components:
      discriminator:
        propertyName: type
        mapping:
-          custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+          custom_llm_as_judge: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
          regex_parser: '#/components/schemas/RegexParserScoringFnParams'
          regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams'
          equality: '#/components/schemas/EqualityScoringFnParams'
@ -4886,6 +4961,13 @@ components:
          context_recall: '#/components/schemas/ContextRecallScoringFnParams'
          context_relevancy: '#/components/schemas/ContextRelevancyScoringFnParams'
    SubsetOfcoringFnParams:
+      type: object
+      properties:
+        type:
+          type: string
+          const: subset_of
+          default: subset_of
+        subset_of:
          type: object
          properties:
            aggregation_functions:
@ -4900,15 +4982,14 @@ components:
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
-            not provided, no aggregation will be performed.
-        type:
-          type: string
-          const: subset_of
-          default: subset_of
+                (Optional) Aggregation functions to apply to the scores of each row.
+                If not provided, no aggregation will be performed.
+          additionalProperties: false
+          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
+        - subset_of
      title: SubsetOfcoringFnParams
    EvaluateRowsRequest:
      type: object
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -67,7 +67,7 @@ class AggregationFunctionType(Enum):
    accuracy = "accuracy"


-class BasicScoringFnParamsCommon(BaseModel):
+class BasicScoringFnParamsFields(BaseModel):
    """
    :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
    """
@ -78,7 +78,7 @@ class BasicScoringFnParamsCommon(BaseModel):
    )


-class RegexParserScoringFnParamsCommon(BaseModel):
+class RegexParserScoringFnParamsFields(BaseModel):
    """
    :param parsing_regexes: (Optional) Regexes to extract the answer from generated response.
    :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
@ -93,74 +93,7 @@ class RegexParserScoringFnParamsCommon(BaseModel):
        default_factory=list,
    )

-
-@json_schema_type
-class RegexParserScoringFnParams(RegexParserScoringFnParamsCommon):
-    type: Literal["regex_parser"] = "regex_parser"
-
-
-@json_schema_type
-class RegexParserMathScoringFnParams(RegexParserScoringFnParamsCommon):
-    type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
-
-
-@json_schema_type
-class EqualityScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["equality"] = "equality"
-
-
-@json_schema_type
-class SubsetOfcoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["subset_of"] = "subset_of"
-
-
-@json_schema_type
-class FactualityScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["factuality"] = "factuality"
-
-
-@json_schema_type
-class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["faithfulness"] = "faithfulness"
-
-
-@json_schema_type
-class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["answer_correctness"] = "answer_correctness"
-
-
-@json_schema_type
-class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["answer_relevancy"] = "answer_relevancy"
-
-
-@json_schema_type
-class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["answer_similarity"] = "answer_similarity"
-
-
-@json_schema_type
-class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["context_entity_recall"] = "context_entity_recall"
-
-
-@json_schema_type
-class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["context_precision"] = "context_precision"
-
-
-@json_schema_type
-class ContextRecallScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["context_recall"] = "context_recall"
-
-
-@json_schema_type
-class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon):
-    type: Literal["context_relevancy"] = "context_relevancy"
-
-
-@json_schema_type
-class LLMAsJudgeScoringFnParams(BaseModel):
+class CustomLLMAsJudgeScoringFnParamsFields(BaseModel):
    type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
    judge_model: str
    prompt_template: Optional[str] = None
@ -168,16 +101,84 @@ class LLMAsJudgeScoringFnParams(BaseModel):
        description="Regexes to extract the answer from generated response",
        default_factory=list,
    )
-    aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
-        description="Aggregation functions to apply to the scores of each row",
-        default_factory=list,
-    )
+
+@json_schema_type
+class RegexParserScoringFnParams(BaseModel):
+    type: Literal["regex_parser"] = "regex_parser"
+    regex_parser: RegexParserScoringFnParamsFields
+
+
+@json_schema_type
+class RegexParserMathScoringFnParams(BaseModel):
+    type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
+    regex_parser_math_response: RegexParserScoringFnParamsFields
+
+@json_schema_type
+class EqualityScoringFnParams(BaseModel):
+    type: Literal["equality"] = "equality"
+    equality: BasicScoringFnParamsFields
+
+@json_schema_type
+class SubsetOfcoringFnParams(BaseModel):
+    type: Literal["subset_of"] = "subset_of"
+    subset_of: BasicScoringFnParamsFields
+
+@json_schema_type
+class FactualityScoringFnParams(BaseModel):
+    type: Literal["factuality"] = "factuality"
+    factuality: BasicScoringFnParamsFields
+
+@json_schema_type
+class FaithfulnessScoringFnParams(BaseModel):
+    type: Literal["faithfulness"] = "faithfulness"
+    faithfulness: BasicScoringFnParamsFields
+
+@json_schema_type
+class AnswerCorrectnessScoringFnParams(BaseModel):
+    type: Literal["answer_correctness"] = "answer_correctness"
+    answer_correctness: BasicScoringFnParamsFields
+
+@json_schema_type
+class AnswerRelevancyScoringFnParams(BaseModel):
+    type: Literal["answer_relevancy"] = "answer_relevancy"
+    answer_relevancy: BasicScoringFnParamsFields
+
+@json_schema_type
+class AnswerSimilarityScoringFnParams(BaseModel):
+    type: Literal["answer_similarity"] = "answer_similarity"
+    answer_similarity: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextEntityRecallScoringFnParams(BaseModel):
+    type: Literal["context_entity_recall"] = "context_entity_recall"
+    context_entity_recall: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextPrecisionScoringFnParams(BaseModel):
+    type: Literal["context_precision"] = "context_precision"
+    context_precision: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextRecallScoringFnParams(BaseModel):
+    type: Literal["context_recall"] = "context_recall"
+    context_recall: BasicScoringFnParamsFields
+
+@json_schema_type
+class ContextRelevancyScoringFnParams(BaseModel):
+    type: Literal["context_relevancy"] = "context_relevancy"
+    context_relevancy: BasicScoringFnParamsFields
+
+
+@json_schema_type
+class CustomLLMAsJudgeScoringFnParams(BaseModel):
+    type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
+    custom_llm_as_judge: CustomLLMAsJudgeScoringFnParamsFields


 ScoringFnParams = register_schema(
    Annotated[
        Union[
-            LLMAsJudgeScoringFnParams,
+            CustomLLMAsJudgeScoringFnParams,
            RegexParserScoringFnParams,
            RegexParserMathScoringFnParams,
            EqualityScoringFnParams,