better params fields

2026-01-05 10:12:29 +00:00 · 2025-03-12 21:31:22 -07:00 · 2025-03-12 21:31:22 -07:00 · a7abe6df74
commit a7abe6df74
parent 93c131ed5f
3 changed files with 763 additions and 571 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -6388,90 +6388,114 @@
            "AnswerCorrectnessScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "answer_correctness",
                        "default": "answer_correctness"
                    },
                    "answer_correctness": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "answer_correctness"
                ],
                "title": "AnswerCorrectnessScoringFnParams"
            },
            "AnswerRelevancyScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "answer_relevancy",
                        "default": "answer_relevancy"
                    },
                    "answer_relevancy": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "answer_relevancy"
                ],
                "title": "AnswerRelevancyScoringFnParams"
            },
            "AnswerSimilarityScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "answer_similarity",
                        "default": "answer_similarity"
                    },
                    "answer_similarity": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "answer_similarity"
                ],
                "title": "AnswerSimilarityScoringFnParams"
            },
@ -6505,150 +6529,234 @@
            "ContextEntityRecallScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "context_entity_recall",
                        "default": "context_entity_recall"
                    },
                    "context_entity_recall": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_entity_recall"
                ],
                "title": "ContextEntityRecallScoringFnParams"
            },
            "ContextPrecisionScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "context_precision",
                        "default": "context_precision"
                    },
                    "context_precision": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_precision"
                ],
                "title": "ContextPrecisionScoringFnParams"
            },
            "ContextRecallScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "context_recall",
                        "default": "context_recall"
                    },
                    "context_recall": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_recall"
                ],
                "title": "ContextRecallScoringFnParams"
            },
            "ContextRelevancyScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "context_relevancy",
                        "default": "context_relevancy"
                    },
                    "context_relevancy": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "context_relevancy"
                ],
                "title": "ContextRelevancyScoringFnParams"
            },
            "CustomLLMAsJudgeScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "custom_llm_as_judge",
                        "default": "custom_llm_as_judge"
                    },
                    "custom_llm_as_judge": {
                        "type": "object",
                        "properties": {
                            "type": {
                                "type": "string",
                                "const": "custom_llm_as_judge",
                                "default": "custom_llm_as_judge"
                            },
                            "judge_model": {
                                "type": "string"
                            },
                            "prompt_template": {
                                "type": "string"
                            },
                            "judge_score_regexes": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                }
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "type",
                            "judge_model"
                        ],
                        "title": "CustomLLMAsJudgeScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "custom_llm_as_judge"
                ],
                "title": "CustomLLMAsJudgeScoringFnParams"
            },
            "EqualityScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "equality",
                        "default": "equality"
                    },
                    "equality": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "equality"
                ],
                "title": "EqualityScoringFnParams"
            },
@ -6672,104 +6780,78 @@
            "FactualityScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "factuality",
                        "default": "factuality"
                    },
                    "factuality": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "factuality"
                ],
                "title": "FactualityScoringFnParams"
            },
            "FaithfulnessScoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "faithfulness",
                        "default": "faithfulness"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
                "title": "FaithfulnessScoringFnParams"
            },
            "LLMAsJudgeScoringFnParams": {
                "type": "object",
                "properties": {
                    "type": {
                        "type": "string",
                        "const": "custom_llm_as_judge",
                        "default": "custom_llm_as_judge"
                    },
-                    "judge_model": {
+                    "faithfulness": {
-                        "type": "string"
+                        "type": "object",
-                    },
+                        "properties": {
-                    "prompt_template": {
+                            "aggregation_functions": {
-                        "type": "string"
+                                "type": "array",
-                    },
+                                "items": {
-                    "judge_score_regexes": {
+                                    "type": "string",
-                        "type": "array",
+                                    "enum": [
-                        "items": {
+                                        "average",
-                            "type": "string"
+                                        "median",
-                        }
+                                        "categorical_count",
-                    },
+                                        "accuracy"
-                    "aggregation_functions": {
+                                    ],
-                        "type": "array",
+                                    "title": "AggregationFunctionType",
-                        "items": {
+                                    "description": "A type of aggregation function."
-                            "type": "string",
+                                },
-                            "enum": [
+                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
-                                "average",
+                            }
-                                "median",
+                        },
-                                "categorical_count",
+                        "additionalProperties": false,
-                                "accuracy"
+                        "title": "BasicScoringFnParamsFields"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
-                    "judge_model"
+                    "faithfulness"
                ],
-                "title": "LLMAsJudgeScoringFnParams"
+                "title": "FaithfulnessScoringFnParams"
            },
            "ModelCandidate": {
                "type": "object",
@ -6804,83 +6886,103 @@
            "RegexParserMathScoringFnParams": {
                "type": "object",
                "properties": {
                    "parsing_regexes": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "(Optional) Regexes to extract the answer from generated response."
                    },
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "regex_parser_math_response",
                        "default": "regex_parser_math_response"
                    },
                    "regex_parser_math_response": {
                        "type": "object",
                        "properties": {
                            "parsing_regexes": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                },
                                "description": "(Optional) Regexes to extract the answer from generated response."
                            },
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "parsing_regexes"
                        ],
                        "title": "RegexParserScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "parsing_regexes",
+                    "type",
-                    "type"
+                    "regex_parser_math_response"
                ],
                "title": "RegexParserMathScoringFnParams"
            },
            "RegexParserScoringFnParams": {
                "type": "object",
                "properties": {
                    "parsing_regexes": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        },
                        "description": "(Optional) Regexes to extract the answer from generated response."
                    },
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "regex_parser",
                        "default": "regex_parser"
                    },
                    "regex_parser": {
                        "type": "object",
                        "properties": {
                            "parsing_regexes": {
                                "type": "array",
                                "items": {
                                    "type": "string"
                                },
                                "description": "(Optional) Regexes to extract the answer from generated response."
                            },
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "parsing_regexes"
                        ],
                        "title": "RegexParserScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "parsing_regexes",
+                    "type",
-                    "type"
+                    "regex_parser"
                ],
                "title": "RegexParserScoringFnParams"
            },
            "ScoringFnParams": {
                "oneOf": [
                    {
-                        "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+                        "$ref": "#/components/schemas/CustomLLMAsJudgeScoringFnParams"
                    },
                    {
                        "$ref": "#/components/schemas/RegexParserScoringFnParams"
@ -6925,7 +7027,7 @@
                "discriminator": {
                    "propertyName": "type",
                    "mapping": {
-                        "custom_llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
+                        "custom_llm_as_judge": "#/components/schemas/CustomLLMAsJudgeScoringFnParams",
                        "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
                        "regex_parser_math_response": "#/components/schemas/RegexParserMathScoringFnParams",
                        "equality": "#/components/schemas/EqualityScoringFnParams",
@ -6945,30 +7047,38 @@
            "SubsetOfcoringFnParams": {
                "type": "object",
                "properties": {
                    "aggregation_functions": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": [
                                "average",
                                "median",
                                "categorical_count",
                                "accuracy"
                            ],
                            "title": "AggregationFunctionType",
                            "description": "A type of aggregation function."
                        },
                        "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                    },
                    "type": {
                        "type": "string",
                        "const": "subset_of",
                        "default": "subset_of"
                    },
                    "subset_of": {
                        "type": "object",
                        "properties": {
                            "aggregation_functions": {
                                "type": "array",
                                "items": {
                                    "type": "string",
                                    "enum": [
                                        "average",
                                        "median",
                                        "categorical_count",
                                        "accuracy"
                                    ],
                                    "title": "AggregationFunctionType",
                                    "description": "A type of aggregation function."
                                },
                                "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
                            }
                        },
                        "additionalProperties": false,
                        "title": "BasicScoringFnParamsFields"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "type"
+                    "type",
                    "subset_of"
                ],
                "title": "SubsetOfcoringFnParams"
            },
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -4451,77 +4451,95 @@ components:
    AnswerCorrectnessScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: answer_correctness
          default: answer_correctness
        answer_correctness:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - answer_correctness
      title: AnswerCorrectnessScoringFnParams
    AnswerRelevancyScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: answer_relevancy
          default: answer_relevancy
        answer_relevancy:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - answer_relevancy
      title: AnswerRelevancyScoringFnParams
    AnswerSimilarityScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: answer_similarity
          default: answer_similarity
        answer_similarity:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - answer_similarity
      title: AnswerSimilarityScoringFnParams
    BenchmarkConfig:
      type: object
@ -4551,127 +4569,189 @@ components:
    ContextEntityRecallScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_entity_recall
          default: context_entity_recall
        context_entity_recall:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_entity_recall
      title: ContextEntityRecallScoringFnParams
    ContextPrecisionScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_precision
          default: context_precision
        context_precision:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_precision
      title: ContextPrecisionScoringFnParams
    ContextRecallScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_recall
          default: context_recall
        context_recall:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_recall
      title: ContextRecallScoringFnParams
    ContextRelevancyScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: context_relevancy
          default: context_relevancy
        context_relevancy:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - context_relevancy
      title: ContextRelevancyScoringFnParams
    CustomLLMAsJudgeScoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: custom_llm_as_judge
          default: custom_llm_as_judge
        custom_llm_as_judge:
          type: object
          properties:
            type:
              type: string
              const: custom_llm_as_judge
              default: custom_llm_as_judge
            judge_model:
              type: string
            prompt_template:
              type: string
            judge_score_regexes:
              type: array
              items:
                type: string
          additionalProperties: false
          required:
            - type
            - judge_model
          title: CustomLLMAsJudgeScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - custom_llm_as_judge
      title: CustomLLMAsJudgeScoringFnParams
    EqualityScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: equality
          default: equality
        equality:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - equality
      title: EqualityScoringFnParams
    EvalCandidate:
      oneOf:
@ -4685,84 +4765,65 @@ components:
    FactualityScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: factuality
          default: factuality
        factuality:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - factuality
      title: FactualityScoringFnParams
    FaithfulnessScoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: faithfulness
          default: faithfulness
        faithfulness:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - faithfulness
      title: FaithfulnessScoringFnParams
    LLMAsJudgeScoringFnParams:
      type: object
      properties:
        type:
          type: string
          const: custom_llm_as_judge
          default: custom_llm_as_judge
        judge_model:
          type: string
        prompt_template:
          type: string
        judge_score_regexes:
          type: array
          items:
            type: string
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
      additionalProperties: false
      required:
        - type
        - judge_model
      title: LLMAsJudgeScoringFnParams
    ModelCandidate:
      type: object
      properties:
@ -4791,70 +4852,84 @@ components:
    RegexParserMathScoringFnParams:
      type: object
      properties:
        parsing_regexes:
          type: array
          items:
            type: string
          description: >-
            (Optional) Regexes to extract the answer from generated response.
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: regex_parser_math_response
          default: regex_parser_math_response
        regex_parser_math_response:
          type: object
          properties:
            parsing_regexes:
              type: array
              items:
                type: string
              description: >-
                (Optional) Regexes to extract the answer from generated response.
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          required:
            - parsing_regexes
          title: RegexParserScoringFnParamsFields
      additionalProperties: false
      required:
        - parsing_regexes
        - type
        - regex_parser_math_response
      title: RegexParserMathScoringFnParams
    RegexParserScoringFnParams:
      type: object
      properties:
        parsing_regexes:
          type: array
          items:
            type: string
          description: >-
            (Optional) Regexes to extract the answer from generated response.
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: regex_parser
          default: regex_parser
        regex_parser:
          type: object
          properties:
            parsing_regexes:
              type: array
              items:
                type: string
              description: >-
                (Optional) Regexes to extract the answer from generated response.
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          required:
            - parsing_regexes
          title: RegexParserScoringFnParamsFields
      additionalProperties: false
      required:
        - parsing_regexes
        - type
        - regex_parser
      title: RegexParserScoringFnParams
    ScoringFnParams:
      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+        - $ref: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
        - $ref: '#/components/schemas/RegexParserScoringFnParams'
        - $ref: '#/components/schemas/RegexParserMathScoringFnParams'
        - $ref: '#/components/schemas/EqualityScoringFnParams'
@ -4871,7 +4946,7 @@ components:
      discriminator:
        propertyName: type
        mapping:
-          custom_llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+          custom_llm_as_judge: '#/components/schemas/CustomLLMAsJudgeScoringFnParams'
          regex_parser: '#/components/schemas/RegexParserScoringFnParams'
          regex_parser_math_response: '#/components/schemas/RegexParserMathScoringFnParams'
          equality: '#/components/schemas/EqualityScoringFnParams'
@ -4888,27 +4963,33 @@ components:
    SubsetOfcoringFnParams:
      type: object
      properties:
        aggregation_functions:
          type: array
          items:
            type: string
            enum:
              - average
              - median
              - categorical_count
              - accuracy
            title: AggregationFunctionType
            description: A type of aggregation function.
          description: >-
            (Optional) Aggregation functions to apply to the scores of each row. If
            not provided, no aggregation will be performed.
        type:
          type: string
          const: subset_of
          default: subset_of
        subset_of:
          type: object
          properties:
            aggregation_functions:
              type: array
              items:
                type: string
                enum:
                  - average
                  - median
                  - categorical_count
                  - accuracy
                title: AggregationFunctionType
                description: A type of aggregation function.
              description: >-
                (Optional) Aggregation functions to apply to the scores of each row.
                If not provided, no aggregation will be performed.
          additionalProperties: false
          title: BasicScoringFnParamsFields
      additionalProperties: false
      required:
        - type
        - subset_of
      title: SubsetOfcoringFnParams
    EvaluateRowsRequest:
      type: object
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -67,7 +67,7 @@ class AggregationFunctionType(Enum):
    accuracy = "accuracy"
-class BasicScoringFnParamsCommon(BaseModel):
+class BasicScoringFnParamsFields(BaseModel):
    """
    :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
    """
@ -78,7 +78,7 @@ class BasicScoringFnParamsCommon(BaseModel):
    )
-class RegexParserScoringFnParamsCommon(BaseModel):
+class RegexParserScoringFnParamsFields(BaseModel):
    """
    :param parsing_regexes: (Optional) Regexes to extract the answer from generated response.
    :param aggregation_functions: (Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed.
@ -93,74 +93,7 @@ class RegexParserScoringFnParamsCommon(BaseModel):
        default_factory=list,
    )
-
+class CustomLLMAsJudgeScoringFnParamsFields(BaseModel):
@json_schema_type
 class RegexParserScoringFnParams(RegexParserScoringFnParamsCommon):
    type: Literal["regex_parser"] = "regex_parser"
@json_schema_type
 class RegexParserMathScoringFnParams(RegexParserScoringFnParamsCommon):
    type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
@json_schema_type
 class EqualityScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["equality"] = "equality"
@json_schema_type
 class SubsetOfcoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["subset_of"] = "subset_of"
@json_schema_type
 class FactualityScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["factuality"] = "factuality"
@json_schema_type
 class FaithfulnessScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["faithfulness"] = "faithfulness"
@json_schema_type
 class AnswerCorrectnessScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["answer_correctness"] = "answer_correctness"
@json_schema_type
 class AnswerRelevancyScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["answer_relevancy"] = "answer_relevancy"
@json_schema_type
 class AnswerSimilarityScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["answer_similarity"] = "answer_similarity"
@json_schema_type
 class ContextEntityRecallScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_entity_recall"] = "context_entity_recall"
@json_schema_type
 class ContextPrecisionScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_precision"] = "context_precision"
@json_schema_type
 class ContextRecallScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_recall"] = "context_recall"
@json_schema_type
 class ContextRelevancyScoringFnParams(BasicScoringFnParamsCommon):
    type: Literal["context_relevancy"] = "context_relevancy"
@json_schema_type
 class LLMAsJudgeScoringFnParams(BaseModel):
    type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
    judge_model: str
    prompt_template: Optional[str] = None
@ -168,16 +101,84 @@ class LLMAsJudgeScoringFnParams(BaseModel):
        description="Regexes to extract the answer from generated response",
        default_factory=list,
    )
-    aggregation_functions: Optional[List[AggregationFunctionType]] = Field(
+
-        description="Aggregation functions to apply to the scores of each row",
+@json_schema_type
-        default_factory=list,
+class RegexParserScoringFnParams(BaseModel):
-    )
+    type: Literal["regex_parser"] = "regex_parser"
    regex_parser: RegexParserScoringFnParamsFields
@json_schema_type
 class RegexParserMathScoringFnParams(BaseModel):
    type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
    regex_parser_math_response: RegexParserScoringFnParamsFields
@json_schema_type
 class EqualityScoringFnParams(BaseModel):
    type: Literal["equality"] = "equality"
    equality: BasicScoringFnParamsFields
@json_schema_type
 class SubsetOfcoringFnParams(BaseModel):
    type: Literal["subset_of"] = "subset_of"
    subset_of: BasicScoringFnParamsFields
@json_schema_type
 class FactualityScoringFnParams(BaseModel):
    type: Literal["factuality"] = "factuality"
    factuality: BasicScoringFnParamsFields
@json_schema_type
 class FaithfulnessScoringFnParams(BaseModel):
    type: Literal["faithfulness"] = "faithfulness"
    faithfulness: BasicScoringFnParamsFields
@json_schema_type
 class AnswerCorrectnessScoringFnParams(BaseModel):
    type: Literal["answer_correctness"] = "answer_correctness"
    answer_correctness: BasicScoringFnParamsFields
@json_schema_type
 class AnswerRelevancyScoringFnParams(BaseModel):
    type: Literal["answer_relevancy"] = "answer_relevancy"
    answer_relevancy: BasicScoringFnParamsFields
@json_schema_type
 class AnswerSimilarityScoringFnParams(BaseModel):
    type: Literal["answer_similarity"] = "answer_similarity"
    answer_similarity: BasicScoringFnParamsFields
@json_schema_type
 class ContextEntityRecallScoringFnParams(BaseModel):
    type: Literal["context_entity_recall"] = "context_entity_recall"
    context_entity_recall: BasicScoringFnParamsFields
@json_schema_type
 class ContextPrecisionScoringFnParams(BaseModel):
    type: Literal["context_precision"] = "context_precision"
    context_precision: BasicScoringFnParamsFields
@json_schema_type
 class ContextRecallScoringFnParams(BaseModel):
    type: Literal["context_recall"] = "context_recall"
    context_recall: BasicScoringFnParamsFields
@json_schema_type
 class ContextRelevancyScoringFnParams(BaseModel):
    type: Literal["context_relevancy"] = "context_relevancy"
    context_relevancy: BasicScoringFnParamsFields
@json_schema_type
 class CustomLLMAsJudgeScoringFnParams(BaseModel):
    type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
    custom_llm_as_judge: CustomLLMAsJudgeScoringFnParamsFields
 ScoringFnParams = register_schema(
    Annotated[
        Union[
-            LLMAsJudgeScoringFnParams,
+            CustomLLMAsJudgeScoringFnParams,
            RegexParserScoringFnParams,
            RegexParserMathScoringFnParams,
            EqualityScoringFnParams,