This commit is contained in:
Xi Yan 2025-03-12 21:23:35 -07:00
parent 20cdcd87a3
commit 93c131ed5f
2 changed files with 501 additions and 114 deletions

View file

@ -1401,11 +1401,46 @@
"tags": [ "tags": [
"ScoringFunctions" "ScoringFunctions"
], ],
"description": "", "description": "Get a scoring function by its ID.",
"parameters": [ "parameters": [
{ {
"name": "scoring_fn_id", "name": "scoring_fn_id",
"in": "path", "in": "path",
"description": "The ID of the scoring function to get.",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": {
"200": {
"description": "OK"
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"ScoringFunctions"
],
"description": "Unregister a scoring function by its ID.",
"parameters": [
{
"name": "scoring_fn_id",
"in": "path",
"description": "The ID of the scoring function to unregister.",
"required": true, "required": true,
"schema": { "schema": {
"type": "string" "type": "string"
@ -2772,6 +2807,39 @@
] ]
} }
}, },
"/v1/scoring-functions/types": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListScoringFunctionTypesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"ScoringFunctions"
],
"description": "List all available scoring function types information and how to use them.",
"parameters": []
}
},
"/v1/scoring-functions": { "/v1/scoring-functions": {
"get": { "get": {
"responses": { "responses": {
@ -2801,7 +2869,7 @@
"tags": [ "tags": [
"ScoringFunctions" "ScoringFunctions"
], ],
"description": "", "description": "List all registered scoring functions.",
"parameters": [] "parameters": []
}, },
"post": { "post": {
@ -6317,24 +6385,21 @@
"title": "AgentCandidate", "title": "AgentCandidate",
"description": "An agent candidate for evaluation." "description": "An agent candidate for evaluation."
}, },
"AggregationFunctionType": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
},
"AnswerCorrectnessScoringFnParams": { "AnswerCorrectnessScoringFnParams": {
"type": "object", "type": "object",
"properties": { "properties": {
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6356,7 +6421,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6378,7 +6451,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6427,7 +6508,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6449,7 +6538,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6471,7 +6568,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6493,7 +6598,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6515,7 +6628,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6554,7 +6675,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6576,7 +6705,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6615,7 +6752,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
} }
} }
}, },
@ -6669,7 +6814,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6699,7 +6852,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -6787,7 +6948,15 @@
"aggregation_functions": { "aggregation_functions": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/AggregationFunctionType" "type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}, },
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed." "description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
}, },
@ -7078,12 +7247,11 @@
"type": "string", "type": "string",
"description": "The ID of the dataset to used to run the benchmark." "description": "The ID of the dataset to used to run the benchmark."
}, },
"scoring_functions": { "scoring_fn_ids": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/ScoringFnParams" "type": "string"
}, }
"description": "The scoring functions with parameters to use for this benchmark."
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -7119,7 +7287,7 @@
"provider_id", "provider_id",
"type", "type",
"dataset_id", "dataset_id",
"scoring_functions", "scoring_fn_ids",
"metadata" "metadata"
], ],
"title": "Benchmark" "title": "Benchmark"
@ -7459,29 +7627,9 @@
"const": "scoring_function", "const": "scoring_function",
"default": "scoring_function" "default": "scoring_function"
}, },
"scoring_fn_type": { "fn": {
"type": "string",
"enum": [
"custom_llm_as_judge",
"regex_parser",
"regex_parser_math_response",
"equality",
"subset_of",
"factuality",
"faithfulness",
"answer_correctness",
"answer_relevancy",
"answer_similarity",
"context_entity_recall",
"context_precision",
"context_recall",
"context_relevancy"
],
"description": "The type of scoring function."
},
"params": {
"$ref": "#/components/schemas/ScoringFnParams", "$ref": "#/components/schemas/ScoringFnParams",
"description": "(Optional) The parameters for the scoring function." "description": "The scoring function type and parameters."
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -7516,7 +7664,7 @@
"provider_resource_id", "provider_resource_id",
"provider_id", "provider_id",
"type", "type",
"scoring_fn_type", "fn",
"metadata" "metadata"
], ],
"title": "ScoringFn" "title": "ScoringFn"
@ -8499,6 +8647,71 @@
], ],
"title": "ListRoutesResponse" "title": "ListRoutesResponse"
}, },
"ScoringFnTypeInfo": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"custom_llm_as_judge",
"regex_parser",
"regex_parser_math_response",
"equality",
"subset_of",
"factuality",
"faithfulness",
"answer_correctness",
"answer_relevancy",
"answer_similarity",
"context_entity_recall",
"context_precision",
"context_recall",
"context_relevancy"
],
"description": "The type of scoring function."
},
"description": {
"type": "string",
"description": "A description of the scoring function type. - E.g. Write your custom judge prompt to score the answer."
},
"supported_purposes": {
"type": "array",
"items": {
"type": "string",
"enum": [
"post-training/messages",
"eval/question-answer"
],
"title": "DatasetPurpose",
"description": "Purpose of the dataset. Each type has a different column format."
},
"description": "The purposes that this scoring function can be used for."
}
},
"additionalProperties": false,
"required": [
"type",
"description",
"supported_purposes"
],
"title": "ScoringFnTypeInfo"
},
"ListScoringFunctionTypesResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ScoringFnTypeInfo"
}
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "ListScoringFunctionTypesResponse"
},
"ListScoringFunctionsResponse": { "ListScoringFunctionsResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -9509,12 +9722,12 @@
"type": "string", "type": "string",
"description": "The ID of the dataset to used to run the benchmark." "description": "The ID of the dataset to used to run the benchmark."
}, },
"scoring_functions": { "scoring_fn_ids": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/ScoringFnParams" "type": "string"
}, },
"description": "The scoring functions with parameters to use for this benchmark." "description": "List of scoring function ids to use for this benchmark."
}, },
"benchmark_id": { "benchmark_id": {
"type": "string", "type": "string",
@ -9550,7 +9763,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"dataset_id", "dataset_id",
"scoring_functions" "scoring_fn_ids"
], ],
"title": "RegisterBenchmarkRequest" "title": "RegisterBenchmarkRequest"
}, },

View file

@ -952,10 +952,36 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- ScoringFunctions - ScoringFunctions
description: '' description: Get a scoring function by its ID.
parameters: parameters:
- name: scoring_fn_id - name: scoring_fn_id
in: path in: path
description: The ID of the scoring function to get.
required: true
schema:
type: string
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
description: Unregister a scoring function by its ID.
parameters:
- name: scoring_fn_id
in: path
description: >-
The ID of the scoring function to unregister.
required: true required: true
schema: schema:
type: string type: string
@ -1869,6 +1895,30 @@ paths:
required: false required: false
schema: schema:
$ref: '#/components/schemas/URL' $ref: '#/components/schemas/URL'
/v1/scoring-functions/types:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListScoringFunctionTypesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
description: >-
List all available scoring function types information and how to use them.
parameters: []
/v1/scoring-functions: /v1/scoring-functions:
get: get:
responses: responses:
@ -1890,7 +1940,7 @@ paths:
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- ScoringFunctions - ScoringFunctions
description: '' description: List all registered scoring functions.
parameters: [] parameters: []
post: post:
responses: responses:
@ -4398,22 +4448,20 @@ components:
- config - config
title: AgentCandidate title: AgentCandidate
description: An agent candidate for evaluation. description: An agent candidate for evaluation.
AggregationFunctionType:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
AnswerCorrectnessScoringFnParams: AnswerCorrectnessScoringFnParams:
type: object type: object
properties: properties:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4431,7 +4479,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4449,7 +4504,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4492,7 +4554,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4510,7 +4579,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4528,7 +4604,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4546,7 +4629,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4564,7 +4654,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4591,7 +4688,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4609,7 +4713,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4639,7 +4750,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false additionalProperties: false
required: required:
- type - type
@ -4682,7 +4800,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4707,7 +4832,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4759,7 +4891,14 @@ components:
aggregation_functions: aggregation_functions:
type: array type: array
items: items:
$ref: '#/components/schemas/AggregationFunctionType' type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
description: >- description: >-
(Optional) Aggregation functions to apply to the scores of each row. If (Optional) Aggregation functions to apply to the scores of each row. If
not provided, no aggregation will be performed. not provided, no aggregation will be performed.
@ -4939,12 +5078,10 @@ components:
type: string type: string
description: >- description: >-
The ID of the dataset to used to run the benchmark. The ID of the dataset to used to run the benchmark.
scoring_functions: scoring_fn_ids:
type: array type: array
items: items:
$ref: '#/components/schemas/ScoringFnParams' type: string
description: >-
The scoring functions with parameters to use for this benchmark.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -4964,7 +5101,7 @@ components:
- provider_id - provider_id
- type - type
- dataset_id - dataset_id
- scoring_functions - scoring_fn_ids
- metadata - metadata
title: Benchmark title: Benchmark
DataSource: DataSource:
@ -5180,28 +5317,10 @@ components:
type: string type: string
const: scoring_function const: scoring_function
default: scoring_function default: scoring_function
scoring_fn_type: fn:
type: string
enum:
- custom_llm_as_judge
- regex_parser
- regex_parser_math_response
- equality
- subset_of
- factuality
- faithfulness
- answer_correctness
- answer_relevancy
- answer_similarity
- context_entity_recall
- context_precision
- context_recall
- context_relevancy
description: The type of scoring function.
params:
$ref: '#/components/schemas/ScoringFnParams' $ref: '#/components/schemas/ScoringFnParams'
description: >- description: >-
(Optional) The parameters for the scoring function. The scoring function type and parameters.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -5220,7 +5339,7 @@ components:
- provider_resource_id - provider_resource_id
- provider_id - provider_id
- type - type
- scoring_fn_type - fn
- metadata - metadata
title: ScoringFn title: ScoringFn
Shield: Shield:
@ -5842,6 +5961,61 @@ components:
required: required:
- data - data
title: ListRoutesResponse title: ListRoutesResponse
ScoringFnTypeInfo:
type: object
properties:
type:
type: string
enum:
- custom_llm_as_judge
- regex_parser
- regex_parser_math_response
- equality
- subset_of
- factuality
- faithfulness
- answer_correctness
- answer_relevancy
- answer_similarity
- context_entity_recall
- context_precision
- context_recall
- context_relevancy
description: The type of scoring function.
description:
type: string
description: >-
A description of the scoring function type. - E.g. Write your custom judge
prompt to score the answer.
supported_purposes:
type: array
items:
type: string
enum:
- post-training/messages
- eval/question-answer
title: DatasetPurpose
description: >-
Purpose of the dataset. Each type has a different column format.
description: >-
The purposes that this scoring function can be used for.
additionalProperties: false
required:
- type
- description
- supported_purposes
title: ScoringFnTypeInfo
ListScoringFunctionTypesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/ScoringFnTypeInfo'
additionalProperties: false
required:
- data
title: ListScoringFunctionTypesResponse
ListScoringFunctionsResponse: ListScoringFunctionsResponse:
type: object type: object
properties: properties:
@ -6491,12 +6665,12 @@ components:
type: string type: string
description: >- description: >-
The ID of the dataset to used to run the benchmark. The ID of the dataset to used to run the benchmark.
scoring_functions: scoring_fn_ids:
type: array type: array
items: items:
$ref: '#/components/schemas/ScoringFnParams' type: string
description: >- description: >-
The scoring functions with parameters to use for this benchmark. List of scoring function ids to use for this benchmark.
benchmark_id: benchmark_id:
type: string type: string
description: >- description: >-
@ -6517,7 +6691,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- dataset_id - dataset_id
- scoring_functions - scoring_fn_ids
title: RegisterBenchmarkRequest title: RegisterBenchmarkRequest
RegisterDatasetRequest: RegisterDatasetRequest:
type: object type: object