mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 19:12:09 +00:00
purpose
This commit is contained in:
parent
20cdcd87a3
commit
93c131ed5f
2 changed files with 501 additions and 114 deletions
331
docs/_static/llama-stack-spec.html
vendored
331
docs/_static/llama-stack-spec.html
vendored
|
@ -1401,11 +1401,46 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"ScoringFunctions"
|
"ScoringFunctions"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "Get a scoring function by its ID.",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
{
|
{
|
||||||
"name": "scoring_fn_id",
|
"name": "scoring_fn_id",
|
||||||
"in": "path",
|
"in": "path",
|
||||||
|
"description": "The ID of the scoring function to get.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"delete": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"ScoringFunctions"
|
||||||
|
],
|
||||||
|
"description": "Unregister a scoring function by its ID.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "scoring_fn_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the scoring function to unregister.",
|
||||||
"required": true,
|
"required": true,
|
||||||
"schema": {
|
"schema": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -2772,6 +2807,39 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/scoring-functions/types": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListScoringFunctionTypesResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"ScoringFunctions"
|
||||||
|
],
|
||||||
|
"description": "List all available scoring function types information and how to use them.",
|
||||||
|
"parameters": []
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/scoring-functions": {
|
"/v1/scoring-functions": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -2801,7 +2869,7 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"ScoringFunctions"
|
"ScoringFunctions"
|
||||||
],
|
],
|
||||||
"description": "",
|
"description": "List all registered scoring functions.",
|
||||||
"parameters": []
|
"parameters": []
|
||||||
},
|
},
|
||||||
"post": {
|
"post": {
|
||||||
|
@ -6317,24 +6385,21 @@
|
||||||
"title": "AgentCandidate",
|
"title": "AgentCandidate",
|
||||||
"description": "An agent candidate for evaluation."
|
"description": "An agent candidate for evaluation."
|
||||||
},
|
},
|
||||||
"AggregationFunctionType": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"average",
|
|
||||||
"median",
|
|
||||||
"categorical_count",
|
|
||||||
"accuracy"
|
|
||||||
],
|
|
||||||
"title": "AggregationFunctionType",
|
|
||||||
"description": "A type of aggregation function."
|
|
||||||
},
|
|
||||||
"AnswerCorrectnessScoringFnParams": {
|
"AnswerCorrectnessScoringFnParams": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6356,7 +6421,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6378,7 +6451,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6427,7 +6508,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6449,7 +6538,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6471,7 +6568,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6493,7 +6598,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6515,7 +6628,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6554,7 +6675,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6576,7 +6705,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6615,7 +6752,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -6669,7 +6814,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6699,7 +6852,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -6787,7 +6948,15 @@
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"average",
|
||||||
|
"median",
|
||||||
|
"categorical_count",
|
||||||
|
"accuracy"
|
||||||
|
],
|
||||||
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "A type of aggregation function."
|
||||||
},
|
},
|
||||||
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
"description": "(Optional) Aggregation functions to apply to the scores of each row. If not provided, no aggregation will be performed."
|
||||||
},
|
},
|
||||||
|
@ -7078,12 +7247,11 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The ID of the dataset to used to run the benchmark."
|
"description": "The ID of the dataset to used to run the benchmark."
|
||||||
},
|
},
|
||||||
"scoring_functions": {
|
"scoring_fn_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ScoringFnParams"
|
"type": "string"
|
||||||
},
|
}
|
||||||
"description": "The scoring functions with parameters to use for this benchmark."
|
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -7119,7 +7287,7 @@
|
||||||
"provider_id",
|
"provider_id",
|
||||||
"type",
|
"type",
|
||||||
"dataset_id",
|
"dataset_id",
|
||||||
"scoring_functions",
|
"scoring_fn_ids",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"title": "Benchmark"
|
"title": "Benchmark"
|
||||||
|
@ -7459,29 +7627,9 @@
|
||||||
"const": "scoring_function",
|
"const": "scoring_function",
|
||||||
"default": "scoring_function"
|
"default": "scoring_function"
|
||||||
},
|
},
|
||||||
"scoring_fn_type": {
|
"fn": {
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"custom_llm_as_judge",
|
|
||||||
"regex_parser",
|
|
||||||
"regex_parser_math_response",
|
|
||||||
"equality",
|
|
||||||
"subset_of",
|
|
||||||
"factuality",
|
|
||||||
"faithfulness",
|
|
||||||
"answer_correctness",
|
|
||||||
"answer_relevancy",
|
|
||||||
"answer_similarity",
|
|
||||||
"context_entity_recall",
|
|
||||||
"context_precision",
|
|
||||||
"context_recall",
|
|
||||||
"context_relevancy"
|
|
||||||
],
|
|
||||||
"description": "The type of scoring function."
|
|
||||||
},
|
|
||||||
"params": {
|
|
||||||
"$ref": "#/components/schemas/ScoringFnParams",
|
"$ref": "#/components/schemas/ScoringFnParams",
|
||||||
"description": "(Optional) The parameters for the scoring function."
|
"description": "The scoring function type and parameters."
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -7516,7 +7664,7 @@
|
||||||
"provider_resource_id",
|
"provider_resource_id",
|
||||||
"provider_id",
|
"provider_id",
|
||||||
"type",
|
"type",
|
||||||
"scoring_fn_type",
|
"fn",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"title": "ScoringFn"
|
"title": "ScoringFn"
|
||||||
|
@ -8499,6 +8647,71 @@
|
||||||
],
|
],
|
||||||
"title": "ListRoutesResponse"
|
"title": "ListRoutesResponse"
|
||||||
},
|
},
|
||||||
|
"ScoringFnTypeInfo": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"custom_llm_as_judge",
|
||||||
|
"regex_parser",
|
||||||
|
"regex_parser_math_response",
|
||||||
|
"equality",
|
||||||
|
"subset_of",
|
||||||
|
"factuality",
|
||||||
|
"faithfulness",
|
||||||
|
"answer_correctness",
|
||||||
|
"answer_relevancy",
|
||||||
|
"answer_similarity",
|
||||||
|
"context_entity_recall",
|
||||||
|
"context_precision",
|
||||||
|
"context_recall",
|
||||||
|
"context_relevancy"
|
||||||
|
],
|
||||||
|
"description": "The type of scoring function."
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "A description of the scoring function type. - E.g. Write your custom judge prompt to score the answer."
|
||||||
|
},
|
||||||
|
"supported_purposes": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"post-training/messages",
|
||||||
|
"eval/question-answer"
|
||||||
|
],
|
||||||
|
"title": "DatasetPurpose",
|
||||||
|
"description": "Purpose of the dataset. Each type has a different column format."
|
||||||
|
},
|
||||||
|
"description": "The purposes that this scoring function can be used for."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type",
|
||||||
|
"description",
|
||||||
|
"supported_purposes"
|
||||||
|
],
|
||||||
|
"title": "ScoringFnTypeInfo"
|
||||||
|
},
|
||||||
|
"ListScoringFunctionTypesResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/ScoringFnTypeInfo"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"data"
|
||||||
|
],
|
||||||
|
"title": "ListScoringFunctionTypesResponse"
|
||||||
|
},
|
||||||
"ListScoringFunctionsResponse": {
|
"ListScoringFunctionsResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -9509,12 +9722,12 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The ID of the dataset to used to run the benchmark."
|
"description": "The ID of the dataset to used to run the benchmark."
|
||||||
},
|
},
|
||||||
"scoring_functions": {
|
"scoring_fn_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/ScoringFnParams"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"description": "The scoring functions with parameters to use for this benchmark."
|
"description": "List of scoring function ids to use for this benchmark."
|
||||||
},
|
},
|
||||||
"benchmark_id": {
|
"benchmark_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -9550,7 +9763,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"dataset_id",
|
"dataset_id",
|
||||||
"scoring_functions"
|
"scoring_fn_ids"
|
||||||
],
|
],
|
||||||
"title": "RegisterBenchmarkRequest"
|
"title": "RegisterBenchmarkRequest"
|
||||||
},
|
},
|
||||||
|
|
284
docs/_static/llama-stack-spec.yaml
vendored
284
docs/_static/llama-stack-spec.yaml
vendored
|
@ -952,10 +952,36 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- ScoringFunctions
|
- ScoringFunctions
|
||||||
description: ''
|
description: Get a scoring function by its ID.
|
||||||
parameters:
|
parameters:
|
||||||
- name: scoring_fn_id
|
- name: scoring_fn_id
|
||||||
in: path
|
in: path
|
||||||
|
description: The ID of the scoring function to get.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
delete:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- ScoringFunctions
|
||||||
|
description: Unregister a scoring function by its ID.
|
||||||
|
parameters:
|
||||||
|
- name: scoring_fn_id
|
||||||
|
in: path
|
||||||
|
description: >-
|
||||||
|
The ID of the scoring function to unregister.
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
@ -1869,6 +1895,30 @@ paths:
|
||||||
required: false
|
required: false
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/URL'
|
$ref: '#/components/schemas/URL'
|
||||||
|
/v1/scoring-functions/types:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListScoringFunctionTypesResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- ScoringFunctions
|
||||||
|
description: >-
|
||||||
|
List all available scoring function types information and how to use them.
|
||||||
|
parameters: []
|
||||||
/v1/scoring-functions:
|
/v1/scoring-functions:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -1890,7 +1940,7 @@ paths:
|
||||||
$ref: '#/components/responses/DefaultError'
|
$ref: '#/components/responses/DefaultError'
|
||||||
tags:
|
tags:
|
||||||
- ScoringFunctions
|
- ScoringFunctions
|
||||||
description: ''
|
description: List all registered scoring functions.
|
||||||
parameters: []
|
parameters: []
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -4398,22 +4448,20 @@ components:
|
||||||
- config
|
- config
|
||||||
title: AgentCandidate
|
title: AgentCandidate
|
||||||
description: An agent candidate for evaluation.
|
description: An agent candidate for evaluation.
|
||||||
AggregationFunctionType:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- average
|
|
||||||
- median
|
|
||||||
- categorical_count
|
|
||||||
- accuracy
|
|
||||||
title: AggregationFunctionType
|
|
||||||
description: A type of aggregation function.
|
|
||||||
AnswerCorrectnessScoringFnParams:
|
AnswerCorrectnessScoringFnParams:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4431,7 +4479,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4449,7 +4504,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4492,7 +4554,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4510,7 +4579,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4528,7 +4604,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4546,7 +4629,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4564,7 +4654,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4591,7 +4688,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4609,7 +4713,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4639,7 +4750,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
|
@ -4682,7 +4800,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4707,7 +4832,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4759,7 +4891,14 @@ components:
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
type: string
|
||||||
|
enum:
|
||||||
|
- average
|
||||||
|
- median
|
||||||
|
- categorical_count
|
||||||
|
- accuracy
|
||||||
|
title: AggregationFunctionType
|
||||||
|
description: A type of aggregation function.
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) Aggregation functions to apply to the scores of each row. If
|
(Optional) Aggregation functions to apply to the scores of each row. If
|
||||||
not provided, no aggregation will be performed.
|
not provided, no aggregation will be performed.
|
||||||
|
@ -4939,12 +5078,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The ID of the dataset to used to run the benchmark.
|
The ID of the dataset to used to run the benchmark.
|
||||||
scoring_functions:
|
scoring_fn_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ScoringFnParams'
|
type: string
|
||||||
description: >-
|
|
||||||
The scoring functions with parameters to use for this benchmark.
|
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -4964,7 +5101,7 @@ components:
|
||||||
- provider_id
|
- provider_id
|
||||||
- type
|
- type
|
||||||
- dataset_id
|
- dataset_id
|
||||||
- scoring_functions
|
- scoring_fn_ids
|
||||||
- metadata
|
- metadata
|
||||||
title: Benchmark
|
title: Benchmark
|
||||||
DataSource:
|
DataSource:
|
||||||
|
@ -5180,28 +5317,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: scoring_function
|
const: scoring_function
|
||||||
default: scoring_function
|
default: scoring_function
|
||||||
scoring_fn_type:
|
fn:
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- custom_llm_as_judge
|
|
||||||
- regex_parser
|
|
||||||
- regex_parser_math_response
|
|
||||||
- equality
|
|
||||||
- subset_of
|
|
||||||
- factuality
|
|
||||||
- faithfulness
|
|
||||||
- answer_correctness
|
|
||||||
- answer_relevancy
|
|
||||||
- answer_similarity
|
|
||||||
- context_entity_recall
|
|
||||||
- context_precision
|
|
||||||
- context_recall
|
|
||||||
- context_relevancy
|
|
||||||
description: The type of scoring function.
|
|
||||||
params:
|
|
||||||
$ref: '#/components/schemas/ScoringFnParams'
|
$ref: '#/components/schemas/ScoringFnParams'
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) The parameters for the scoring function.
|
The scoring function type and parameters.
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -5220,7 +5339,7 @@ components:
|
||||||
- provider_resource_id
|
- provider_resource_id
|
||||||
- provider_id
|
- provider_id
|
||||||
- type
|
- type
|
||||||
- scoring_fn_type
|
- fn
|
||||||
- metadata
|
- metadata
|
||||||
title: ScoringFn
|
title: ScoringFn
|
||||||
Shield:
|
Shield:
|
||||||
|
@ -5842,6 +5961,61 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListRoutesResponse
|
title: ListRoutesResponse
|
||||||
|
ScoringFnTypeInfo:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- custom_llm_as_judge
|
||||||
|
- regex_parser
|
||||||
|
- regex_parser_math_response
|
||||||
|
- equality
|
||||||
|
- subset_of
|
||||||
|
- factuality
|
||||||
|
- faithfulness
|
||||||
|
- answer_correctness
|
||||||
|
- answer_relevancy
|
||||||
|
- answer_similarity
|
||||||
|
- context_entity_recall
|
||||||
|
- context_precision
|
||||||
|
- context_recall
|
||||||
|
- context_relevancy
|
||||||
|
description: The type of scoring function.
|
||||||
|
description:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
A description of the scoring function type. - E.g. Write your custom judge
|
||||||
|
prompt to score the answer.
|
||||||
|
supported_purposes:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- post-training/messages
|
||||||
|
- eval/question-answer
|
||||||
|
title: DatasetPurpose
|
||||||
|
description: >-
|
||||||
|
Purpose of the dataset. Each type has a different column format.
|
||||||
|
description: >-
|
||||||
|
The purposes that this scoring function can be used for.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- description
|
||||||
|
- supported_purposes
|
||||||
|
title: ScoringFnTypeInfo
|
||||||
|
ListScoringFunctionTypesResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/ScoringFnTypeInfo'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- data
|
||||||
|
title: ListScoringFunctionTypesResponse
|
||||||
ListScoringFunctionsResponse:
|
ListScoringFunctionsResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6491,12 +6665,12 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The ID of the dataset to used to run the benchmark.
|
The ID of the dataset to used to run the benchmark.
|
||||||
scoring_functions:
|
scoring_fn_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/ScoringFnParams'
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The scoring functions with parameters to use for this benchmark.
|
List of scoring function ids to use for this benchmark.
|
||||||
benchmark_id:
|
benchmark_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -6517,7 +6691,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- dataset_id
|
- dataset_id
|
||||||
- scoring_functions
|
- scoring_fn_ids
|
||||||
title: RegisterBenchmarkRequest
|
title: RegisterBenchmarkRequest
|
||||||
RegisterDatasetRequest:
|
RegisterDatasetRequest:
|
||||||
type: object
|
type: object
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue