better params fields

2026-01-06 09:09:57 +00:00 · 2025-03-12 21:31:22 -07:00 · 2025-03-12 21:31:22 -07:00 · a7abe6df74
commit a7abe6df74
parent 93c131ed5f
3 changed files with 763 additions and 571 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -6386,6 +6386,14 @@
                "description": "An agent candidate for evaluation."
            },
            "AnswerCorrectnessScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "answer_correctness",
                        "default": "answer_correctness"
                    },
                    "answer_correctness": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6402,50 +6410,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "answer_correctness",
                        "default": "answer_correctness"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "answer_correctness"
                ],
                "title": "AnswerCorrectnessScoringFnParams"
            },
            "AnswerRelevancyScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "answer_relevancy",
                        "default": "answer_relevancy"
                    }
                    },
-                "additionalProperties": false,
+                    "answer_relevancy": {
                "required": [
                    "type"
                ],
                "title": "AnswerRelevancyScoringFnParams"
            },
            "AnswerSimilarityScoringFnParams": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6462,16 +6448,54 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "answer_similarity",
                        "default": "answer_similarity"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "answer_relevancy"
                ],
                "title": "AnswerRelevancyScoringFnParams"
            },
            "AnswerSimilarityScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "answer_similarity",
                        "default": "answer_similarity"
                    },
                    "answer_similarity": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "answer_similarity"
                ],
                "title": "AnswerSimilarityScoringFnParams"
            },
@ -6503,6 +6527,14 @@
                "description": "A benchmark configuration for evaluation."
            },
            "ContextEntityRecallScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "context_entity_recall",
                        "default": "context_entity_recall"
                    },
                    "context_entity_recall": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6519,20 +6551,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "context_entity_recall",
                        "default": "context_entity_recall"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_entity_recall"
                ],
                "title": "ContextEntityRecallScoringFnParams"
            },
            "ContextPrecisionScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "context_precision",
                        "default": "context_precision"
                    },
                    "context_precision": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6549,20 +6589,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "context_precision",
                        "default": "context_precision"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_precision"
                ],
                "title": "ContextPrecisionScoringFnParams"
            },
            "ContextRecallScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "context_recall",
                        "default": "context_recall"
                    },
                    "context_recall": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6579,50 +6627,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "context_recall",
                        "default": "context_recall"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_recall"
                ],
                "title": "ContextRecallScoringFnParams"
            },
            "ContextRelevancyScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "context_relevancy",
                        "default": "context_relevancy"
                    }
                    },
-                "additionalProperties": false,
+                    "context_relevancy": {
                "required": [
                    "type"
                ],
                "title": "ContextRelevancyScoringFnParams"
            },
            "EqualityScoringFnParams": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6639,16 +6665,98 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "equality",
                        "default": "equality"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_relevancy"
                ],
                "title": "ContextRelevancyScoringFnParams"
            },
            "CustomLLMAsJudgeScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "custom_llm_as_judge",
                        "default": "custom_llm_as_judge"
                    },
                    "custom_llm_as_judge": {
                        "type": "object",
                        "properties": {
                            "type": {
                                "type": "string",
                                "const": "custom_llm_as_judge",
                                "default": "custom_llm_as_judge"
                            },
                            "judge_model": {
                                "type": "string"
                            },
                            "prompt_template": {
                                "type": "string"
                            },
                            "judge_score_regexes": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                }
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "type",
                            "judge_model"
                        ],
                        "title": "CustomLLMAsJudgeScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "custom_llm_as_judge"
                ],
                "title": "CustomLLMAsJudgeScoringFnParams"
            },
            "EqualityScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "equality",
                        "default": "equality"
                    },
                    "equality": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "equality"
                ],
                "title": "EqualityScoringFnParams"
            },
@ -6670,6 +6778,14 @@
                }
            },
            "FactualityScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "factuality",
                        "default": "factuality"
                    },
                    "factuality": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6686,20 +6802,28 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "factuality",
                        "default": "factuality"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "factuality"
                ],
                "title": "FactualityScoringFnParams"
            },
            "FaithfulnessScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "faithfulness",
                        "default": "faithfulness"
                    },
                    "faithfulness": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6716,60 +6840,18 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "faithfulness",
                        "default": "faithfulness"
                            }
                        },
                        "additionalProperties": false,
-                "required": [
+                        "title": "BasicScoringFnParamsFields"
                    "type"
                ],
                "title": "FaithfulnessScoringFnParams"
            },
            "LLMAsJudgeScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "custom_llm_as_judge",
                        "default": "custom_llm_as_judge"
                    },
                    "judge_model": {
                        "type": "string"
                    },
                    "prompt_template": {
                        "type": "string"
                    },
                    "judge_score_regexes": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        }
                    },
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
-                    "judge_model"
+                    "faithfulness"
                ],
-                "title": "LLMAsJudgeScoringFnParams"
+                "title": "FaithfulnessScoringFnParams"
            },
            "ModelCandidate": {
                "type": "object",
@ -6804,42 +6886,12 @@
            "RegexParserMathScoringFnParams": {
                "type": "object",
                "properties": {
                    "parsing_regexes": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "(Optional) Regexes to extract the answer from generated response."
                    },
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "regex_parser_math_response",
                        "default": "regex_parser_math_response"
                    }
                    },
-                "additionalProperties": false,
+                    "regex_parser_math_response": {
                "required": [
                    "parsing_regexes",
                    "type"
                ],
                "title": "RegexParserMathScoringFnParams"
            },
            "RegexParserScoringFnParams": {
                        "type": "object",
                        "properties": {
                            "parsing_regexes": {
@ -6863,24 +6915,74 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "regex_parser",
                        "default": "regex_parser"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
-                    "parsing_regexes",
+                            "parsing_regexes"
-                    "type"
+                        ],
                        "title": "RegexParserScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "regex_parser_math_response"
                ],
                "title": "RegexParserMathScoringFnParams"
            },
            "RegexParserScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "regex_parser",
                        "default": "regex_parser"
                    },
                    "regex_parser": {
                        "type": "object",
                        "properties": {
                            "parsing_regexes": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                },
                                "description": "(Optional) Regexes to extract the answer from generated response."
                            },
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "parsing_regexes"
                        ],
                        "title": "RegexParserScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "regex_parser"
                ],
                "title": "RegexParserScoringFnParams"
            },
            "ScoringFnParams": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+                        "$ref": "#/components/schemas/CustomLLMAsJudgeScoringFnParams"
                    },
                    {
                        "$ref": "#/components/schemas/RegexParserScoringFnParams"
@ -6925,7 +7027,7 @@
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
-                        "custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
+                        "custom_llm_as_judge": "#/components/schemas/CustomLLMAsJudgeScoringFnParams",
                        "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
                        "regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams",
                        "equality": "#/components/schemas/EqualityScoringFnParams",
@ -6943,6 +7045,14 @@
                }
            },
            "SubsetOfcoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "subset_of",
                        "default": "subset_of"
                    },
                    "subset_of": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
@ -6959,16 +7069,16 @@
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
-                    "type": {
+                        "additionalProperties": false,
-                        "type": "string",
+                        "title": "BasicScoringFnParamsFields"
                        "const": "subset_of",
                        "default": "subset_of"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "subset_of"
                ],
                "title": "SubsetOfcoringFnParams"
            },
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -4451,54 +4451,42 @@ components:
    AnswerCorrectnessScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: answer_correctness
          default: answer_correctness
        answer_correctness:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - answer_correctness
      title: AnswerCorrectnessScoringFnParams
    AnswerRelevancyScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: answer_relevancy
          default: answer_relevancy
-      additionalProperties: false
+        answer_relevancy:
      required:
        - type
      title: AnswerRelevancyScoringFnParams
    AnswerSimilarityScoringFnParams:
          type: object
          properties:
            aggregation_functions:
@ -4513,15 +4501,45 @@ components:
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
+                (Optional) Aggregation functions to apply to the scores of each row.
-            not provided, no aggregation will be performed.
+                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - answer_relevancy
      title: AnswerRelevancyScoringFnParams
    AnswerSimilarityScoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: answer_similarity
          default: answer_similarity
        answer_similarity:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - answer_similarity
      title: AnswerSimilarityScoringFnParams
    BenchmarkConfig:
      type: object
@ -4551,188 +4569,135 @@ components:
    ContextEntityRecallScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_entity_recall
          default: context_entity_recall
        context_entity_recall:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_entity_recall
      title: ContextEntityRecallScoringFnParams
    ContextPrecisionScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_precision
          default: context_precision
        context_precision:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_precision
      title: ContextPrecisionScoringFnParams
    ContextRecallScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_recall
          default: context_recall
        context_recall:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_recall
      title: ContextRecallScoringFnParams
    ContextRelevancyScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_relevancy
          default: context_relevancy
        context_relevancy:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_relevancy
      title: ContextRelevancyScoringFnParams
-    EqualityScoringFnParams:
+    CustomLLMAsJudgeScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
-          const: equality
+          const: custom_llm_as_judge
-          default: equality
+          default: custom_llm_as_judge
-      additionalProperties: false
+        custom_llm_as_judge:
      required:
        - type
      title: EqualityScoringFnParams
    EvalCandidate:
      oneOf:
        - $ref: '#/components/schemas/ModelCandidate'
        - $ref: '#/components/schemas/AgentCandidate'
      discriminator:
        propertyName: type
        mapping:
          model: '#/components/schemas/ModelCandidate'
          agent: '#/components/schemas/AgentCandidate'
    FactualityScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: factuality
          default: factuality
      additionalProperties: false
      required:
        - type
      title: FactualityScoringFnParams
    FaithfulnessScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: faithfulness
          default: faithfulness
      additionalProperties: false
      required:
        - type
      title: FaithfulnessScoringFnParams
    LLMAsJudgeScoringFnParams:
          type: object
          properties:
            type:
@ -4747,6 +4712,26 @@ components:
              type: array
              items:
                type: string
          additionalProperties: false
          required:
            - type
            - judge_model
          title: CustomLLMAsJudgeScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - custom_llm_as_judge
      title: CustomLLMAsJudgeScoringFnParams
    EqualityScoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: equality
          default: equality
        equality:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
@ -4758,11 +4743,87 @@ components:
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
-        - judge_model
+        - equality
-      title: LLMAsJudgeScoringFnParams
+      title: EqualityScoringFnParams
    EvalCandidate:
      oneOf:
        - $ref: '#/components/schemas/ModelCandidate'
        - $ref: '#/components/schemas/AgentCandidate'
      discriminator:
        propertyName: type
        mapping:
          model: '#/components/schemas/ModelCandidate'
          agent: '#/components/schemas/AgentCandidate'
    FactualityScoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: factuality
          default: factuality
        factuality:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - factuality
      title: FactualityScoringFnParams
    FaithfulnessScoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: faithfulness
          default: faithfulness
        faithfulness:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - faithfulness
      title: FaithfulnessScoringFnParams
    ModelCandidate:
      type: object
      properties:
@ -4791,36 +4852,11 @@ components:
    RegexParserMathScoringFnParams:
      type: object
      properties:
        parsing_regexes:
          type: array
          items:
            type: string
          description: >-
            (Optional) Regexes to extract the answer from generated response.
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: regex_parser_math_response
          default: regex_parser_math_response
-      additionalProperties: false
+        regex_parser_math_response:
      required:
        - parsing_regexes
        - type
      title: RegexParserMathScoringFnParams
    RegexParserScoringFnParams:
          type: object
          properties:
            parsing_regexes:
@ -4841,20 +4877,59 @@ components:
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
+                (Optional) Aggregation functions to apply to the scores of each row.
-            not provided, no aggregation will be performed.
+                If not provided, no aggregation will be performed.
          additionalProperties: false
          required:
            - parsing_regexes
          title: RegexParserScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - regex_parser_math_response
      title: RegexParserMathScoringFnParams
    RegexParserScoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: regex_parser
          default: regex_parser
        regex_parser:
          type: object
          properties:
            parsing_regexes:
              type: array
              items:
                type: string
              description: >-
                (Optional) Regexes to extract the answer from generated response.
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          required:
            - parsing_regexes
          title: RegexParserScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - regex_parser
      title: RegexParserScoringFnParams
    ScoringFnParams:
      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        - $ref: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
        - $ref: '#/components/schemas/RegexParserScoringFnParams'
        - $ref: '#/components/schemas/RegexParserMathScoringFnParams'
        - $ref: '#/components/schemas/EqualityScoringFnParams'
@ -4871,7 +4946,7 @@ components:
      discriminator:
        propertyName: type
        mapping:
-          custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+          custom_llm_as_judge: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
          regex_parser: '#/components/schemas/RegexParserScoringFnParams'
          regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams'
          equality: '#/components/schemas/EqualityScoringFnParams'
@ -4886,6 +4961,13 @@ components:
          context_recall: '#/components/schemas/ContextRecallScoringFnParams'
          context_relevancy: '#/components/schemas/ContextRelevancyScoringFnParams'
    SubsetOfcoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: subset_of
          default: subset_of
        subset_of:
          type: object
          properties:
            aggregation_functions:
@ -4900,15 +4982,14 @@ components:
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
-            (Optional) Aggregation functions to apply to the scores of each row. If
+                (Optional) Aggregation functions to apply to the scores of each row.
-            not provided, no aggregation will be performed.
+                If not provided, no aggregation will be performed.
-        type:
+          additionalProperties: false
-          type: string
+          title: BasicScoringFnParamsFields
          const: subset_of
          default: subset_of
      additionalProperties: false
      required:
        - type
        - subset_of
      title: SubsetOfcoringFnParams
    EvaluateRowsRequest:
      type: object
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -67,7 +67,7 @@ class AggregationFunctionType(Enum):
    accuracy = "accuracy"
-class BasicScoringFnParamsCommon(BaseModel):
+class BasicScoringFnParamsFields(BaseModel):
    """
    :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
    """
@ -78,7 +78,7 @@ class BasicScoringFnParamsCommon(BaseModel):
    )
-class RegexParserScoringFnParamsCommon(BaseModel):
+class RegexParserScoringFnParamsFields(BaseModel):
    """
    :param parsing_regexes: (Optional) Regexes to extract the answer from generated response.
    :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
@ -93,74 +93,7 @@ class RegexParserScoringFnParamsCommon(BaseModel):
        default_factory=list,
    )
-
+class CustomLLMAsJudgeScoringFnParamsFields(BaseModel):
@json_schema_type
 class RegexParserScoringFnParams(RegexParserScoringFnParamsCommon):
    type: Literal["regex_parser"] = "regex_parser"
@json_schema_type
 class RegexParserMathScoringFnParams(RegexParserScoringFnParamsCommon):
    type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
@json_schema_type
 class EqualityScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["equality"] = "equality"
@json_schema_type
 class SubsetOfcoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["subset_of"] = "subset_of"
@json_schema_type
 class FactualityScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["factuality"] = "factuality"
@json_schema_type
 class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["faithfulness"] = "faithfulness"
@json_schema_type
 class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["answer_correctness"] = "answer_correctness"
@json_schema_type
 class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["answer_relevancy"] = "answer_relevancy"
@json_schema_type
 class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["answer_similarity"] = "answer_similarity"
@json_schema_type
 class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_entity_recall"] = "context_entity_recall"
@json_schema_type
 class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_precision"] = "context_precision"
@json_schema_type
 class ContextRecallScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_recall"] = "context_recall"
@json_schema_type
 class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_relevancy"] = "context_relevancy"
@json_schema_type
 class LLMAsJudgeScoringFnParams(BaseModel):
    type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
    judge_model: str
    prompt_template: Optional[str] = None
@ -168,16 +101,84 @@ class LLMAsJudgeScoringFnParams(BaseModel):
        description="Regexes to extract the answer from generated response",
        default_factory=list,
    )
-    aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
+
-        description="Aggregation functions to apply to the scores of each row",
+@json_schema_type
-        default_factory=list,
+class RegexParserScoringFnParams(BaseModel):
-    )
+    type: Literal["regex_parser"] = "regex_parser"
    regex_parser: RegexParserScoringFnParamsFields
@json_schema_type
 class RegexParserMathScoringFnParams(BaseModel):
    type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
    regex_parser_math_response: RegexParserScoringFnParamsFields
@json_schema_type
 class EqualityScoringFnParams(BaseModel):
    type: Literal["equality"] = "equality"
    equality: BasicScoringFnParamsFields
@json_schema_type
 class SubsetOfcoringFnParams(BaseModel):
    type: Literal["subset_of"] = "subset_of"
    subset_of: BasicScoringFnParamsFields
@json_schema_type
 class FactualityScoringFnParams(BaseModel):
    type: Literal["factuality"] = "factuality"
    factuality: BasicScoringFnParamsFields
@json_schema_type
 class FaithfulnessScoringFnParams(BaseModel):
    type: Literal["faithfulness"] = "faithfulness"
    faithfulness: BasicScoringFnParamsFields
@json_schema_type
 class AnswerCorrectnessScoringFnParams(BaseModel):
    type: Literal["answer_correctness"] = "answer_correctness"
    answer_correctness: BasicScoringFnParamsFields
@json_schema_type
 class AnswerRelevancyScoringFnParams(BaseModel):
    type: Literal["answer_relevancy"] = "answer_relevancy"
    answer_relevancy: BasicScoringFnParamsFields
@json_schema_type
 class AnswerSimilarityScoringFnParams(BaseModel):
    type: Literal["answer_similarity"] = "answer_similarity"
    answer_similarity: BasicScoringFnParamsFields
@json_schema_type
 class ContextEntityRecallScoringFnParams(BaseModel):
    type: Literal["context_entity_recall"] = "context_entity_recall"
    context_entity_recall: BasicScoringFnParamsFields
@json_schema_type
 class ContextPrecisionScoringFnParams(BaseModel):
    type: Literal["context_precision"] = "context_precision"
    context_precision: BasicScoringFnParamsFields
@json_schema_type
 class ContextRecallScoringFnParams(BaseModel):
    type: Literal["context_recall"] = "context_recall"
    context_recall: BasicScoringFnParamsFields
@json_schema_type
 class ContextRelevancyScoringFnParams(BaseModel):
    type: Literal["context_relevancy"] = "context_relevancy"
    context_relevancy: BasicScoringFnParamsFields
@json_schema_type
 class CustomLLMAsJudgeScoringFnParams(BaseModel):
    type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
    custom_llm_as_judge: CustomLLMAsJudgeScoringFnParamsFields
 ScoringFnParams = register_schema(
    Annotated[
        Union[
-            LLMAsJudgeScoringFnParams,
+            CustomLLMAsJudgeScoringFnParams,
            RegexParserScoringFnParams,
            RegexParserMathScoringFnParams,
            EqualityScoringFnParams,