mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
a
This commit is contained in:
parent
a9d8fdef90
commit
78ef9c605f
5 changed files with 125 additions and 25 deletions
|
@ -231,9 +231,9 @@ Before finalizing documentation, verify:
|
||||||
[x] 10. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasets/datasets.py` - Dataset management
|
[x] 10. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasets/datasets.py` - Dataset management
|
||||||
[x] 11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations
|
[x] 11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations
|
||||||
[x] 12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning
|
[x] 12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning
|
||||||
13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
|
[x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
|
||||||
14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
|
[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
|
||||||
15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
|
[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
|
||||||
16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
|
16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
|
||||||
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
|
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
|
||||||
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
|
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
|
||||||
|
|
61
docs/_static/llama-stack-spec.html
vendored
61
docs/_static/llama-stack-spec.html
vendored
|
@ -9301,7 +9301,8 @@
|
||||||
"categorical_count",
|
"categorical_count",
|
||||||
"accuracy"
|
"accuracy"
|
||||||
],
|
],
|
||||||
"title": "AggregationFunctionType"
|
"title": "AggregationFunctionType",
|
||||||
|
"description": "Types of aggregation functions for scoring results."
|
||||||
},
|
},
|
||||||
"BasicScoringFnParams": {
|
"BasicScoringFnParams": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9309,13 +9310,15 @@
|
||||||
"type": {
|
"type": {
|
||||||
"$ref": "#/components/schemas/ScoringFnParamsType",
|
"$ref": "#/components/schemas/ScoringFnParamsType",
|
||||||
"const": "basic",
|
"const": "basic",
|
||||||
"default": "basic"
|
"default": "basic",
|
||||||
|
"description": "The type of scoring function parameters, always basic"
|
||||||
},
|
},
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"$ref": "#/components/schemas/AggregationFunctionType"
|
||||||
}
|
},
|
||||||
|
"description": "Aggregation functions to apply to the scores of each row"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -9323,7 +9326,8 @@
|
||||||
"type",
|
"type",
|
||||||
"aggregation_functions"
|
"aggregation_functions"
|
||||||
],
|
],
|
||||||
"title": "BasicScoringFnParams"
|
"title": "BasicScoringFnParams",
|
||||||
|
"description": "Parameters for basic scoring function configuration."
|
||||||
},
|
},
|
||||||
"BenchmarkConfig": {
|
"BenchmarkConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9375,25 +9379,30 @@
|
||||||
"type": {
|
"type": {
|
||||||
"$ref": "#/components/schemas/ScoringFnParamsType",
|
"$ref": "#/components/schemas/ScoringFnParamsType",
|
||||||
"const": "llm_as_judge",
|
"const": "llm_as_judge",
|
||||||
"default": "llm_as_judge"
|
"default": "llm_as_judge",
|
||||||
|
"description": "The type of scoring function parameters, always llm_as_judge"
|
||||||
},
|
},
|
||||||
"judge_model": {
|
"judge_model": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Identifier of the LLM model to use as a judge for scoring"
|
||||||
},
|
},
|
||||||
"prompt_template": {
|
"prompt_template": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "(Optional) Custom prompt template for the judge model"
|
||||||
},
|
},
|
||||||
"judge_score_regexes": {
|
"judge_score_regexes": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "Regexes to extract the answer from generated response"
|
||||||
},
|
},
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"$ref": "#/components/schemas/AggregationFunctionType"
|
||||||
}
|
},
|
||||||
|
"description": "Aggregation functions to apply to the scores of each row"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -9403,7 +9412,8 @@
|
||||||
"judge_score_regexes",
|
"judge_score_regexes",
|
||||||
"aggregation_functions"
|
"aggregation_functions"
|
||||||
],
|
],
|
||||||
"title": "LLMAsJudgeScoringFnParams"
|
"title": "LLMAsJudgeScoringFnParams",
|
||||||
|
"description": "Parameters for LLM-as-judge scoring function configuration."
|
||||||
},
|
},
|
||||||
"ModelCandidate": {
|
"ModelCandidate": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9441,19 +9451,22 @@
|
||||||
"type": {
|
"type": {
|
||||||
"$ref": "#/components/schemas/ScoringFnParamsType",
|
"$ref": "#/components/schemas/ScoringFnParamsType",
|
||||||
"const": "regex_parser",
|
"const": "regex_parser",
|
||||||
"default": "regex_parser"
|
"default": "regex_parser",
|
||||||
|
"description": "The type of scoring function parameters, always regex_parser"
|
||||||
},
|
},
|
||||||
"parsing_regexes": {
|
"parsing_regexes": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "Regex to extract the answer from generated response"
|
||||||
},
|
},
|
||||||
"aggregation_functions": {
|
"aggregation_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
"$ref": "#/components/schemas/AggregationFunctionType"
|
||||||
}
|
},
|
||||||
|
"description": "Aggregation functions to apply to the scores of each row"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -9462,7 +9475,8 @@
|
||||||
"parsing_regexes",
|
"parsing_regexes",
|
||||||
"aggregation_functions"
|
"aggregation_functions"
|
||||||
],
|
],
|
||||||
"title": "RegexParserScoringFnParams"
|
"title": "RegexParserScoringFnParams",
|
||||||
|
"description": "Parameters for regex parser scoring function configuration."
|
||||||
},
|
},
|
||||||
"ScoringFnParams": {
|
"ScoringFnParams": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
|
@ -9492,7 +9506,8 @@
|
||||||
"regex_parser",
|
"regex_parser",
|
||||||
"basic"
|
"basic"
|
||||||
],
|
],
|
||||||
"title": "ScoringFnParamsType"
|
"title": "ScoringFnParamsType",
|
||||||
|
"description": "Types of scoring function parameter configurations."
|
||||||
},
|
},
|
||||||
"EvaluateRowsRequest": {
|
"EvaluateRowsRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -10765,9 +10780,9 @@
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group"
|
||||||
],
|
],
|
||||||
"title": "ResourceType",
|
|
||||||
"const": "scoring_function",
|
"const": "scoring_function",
|
||||||
"default": "scoring_function"
|
"default": "scoring_function",
|
||||||
|
"description": "The resource type, always scoring_function"
|
||||||
},
|
},
|
||||||
"description": {
|
"description": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
@ -10812,7 +10827,8 @@
|
||||||
"metadata",
|
"metadata",
|
||||||
"return_type"
|
"return_type"
|
||||||
],
|
],
|
||||||
"title": "ScoringFn"
|
"title": "ScoringFn",
|
||||||
|
"description": "A scoring function resource for evaluating model outputs."
|
||||||
},
|
},
|
||||||
"StringType": {
|
"StringType": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -16105,20 +16121,23 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"dataset_id": {
|
"dataset_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "(Optional) The identifier of the dataset that was scored"
|
||||||
},
|
},
|
||||||
"results": {
|
"results": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"$ref": "#/components/schemas/ScoringResult"
|
"$ref": "#/components/schemas/ScoringResult"
|
||||||
}
|
},
|
||||||
|
"description": "A map of scoring function name to ScoringResult"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"results"
|
"results"
|
||||||
],
|
],
|
||||||
"title": "ScoreBatchResponse"
|
"title": "ScoreBatchResponse",
|
||||||
|
"description": "Response from batch scoring operations on datasets."
|
||||||
},
|
},
|
||||||
"AlgorithmConfig": {
|
"AlgorithmConfig": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
|
|
41
docs/_static/llama-stack-spec.yaml
vendored
41
docs/_static/llama-stack-spec.yaml
vendored
|
@ -6681,6 +6681,8 @@ components:
|
||||||
- categorical_count
|
- categorical_count
|
||||||
- accuracy
|
- accuracy
|
||||||
title: AggregationFunctionType
|
title: AggregationFunctionType
|
||||||
|
description: >-
|
||||||
|
Types of aggregation functions for scoring results.
|
||||||
BasicScoringFnParams:
|
BasicScoringFnParams:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6688,15 +6690,21 @@ components:
|
||||||
$ref: '#/components/schemas/ScoringFnParamsType'
|
$ref: '#/components/schemas/ScoringFnParamsType'
|
||||||
const: basic
|
const: basic
|
||||||
default: basic
|
default: basic
|
||||||
|
description: >-
|
||||||
|
The type of scoring function parameters, always basic
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
$ref: '#/components/schemas/AggregationFunctionType'
|
||||||
|
description: >-
|
||||||
|
Aggregation functions to apply to the scores of each row
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- aggregation_functions
|
- aggregation_functions
|
||||||
title: BasicScoringFnParams
|
title: BasicScoringFnParams
|
||||||
|
description: >-
|
||||||
|
Parameters for basic scoring function configuration.
|
||||||
BenchmarkConfig:
|
BenchmarkConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6738,18 +6746,28 @@ components:
|
||||||
$ref: '#/components/schemas/ScoringFnParamsType'
|
$ref: '#/components/schemas/ScoringFnParamsType'
|
||||||
const: llm_as_judge
|
const: llm_as_judge
|
||||||
default: llm_as_judge
|
default: llm_as_judge
|
||||||
|
description: >-
|
||||||
|
The type of scoring function parameters, always llm_as_judge
|
||||||
judge_model:
|
judge_model:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
Identifier of the LLM model to use as a judge for scoring
|
||||||
prompt_template:
|
prompt_template:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) Custom prompt template for the judge model
|
||||||
judge_score_regexes:
|
judge_score_regexes:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
Regexes to extract the answer from generated response
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
$ref: '#/components/schemas/AggregationFunctionType'
|
||||||
|
description: >-
|
||||||
|
Aggregation functions to apply to the scores of each row
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
|
@ -6757,6 +6775,8 @@ components:
|
||||||
- judge_score_regexes
|
- judge_score_regexes
|
||||||
- aggregation_functions
|
- aggregation_functions
|
||||||
title: LLMAsJudgeScoringFnParams
|
title: LLMAsJudgeScoringFnParams
|
||||||
|
description: >-
|
||||||
|
Parameters for LLM-as-judge scoring function configuration.
|
||||||
ModelCandidate:
|
ModelCandidate:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6789,20 +6809,28 @@ components:
|
||||||
$ref: '#/components/schemas/ScoringFnParamsType'
|
$ref: '#/components/schemas/ScoringFnParamsType'
|
||||||
const: regex_parser
|
const: regex_parser
|
||||||
default: regex_parser
|
default: regex_parser
|
||||||
|
description: >-
|
||||||
|
The type of scoring function parameters, always regex_parser
|
||||||
parsing_regexes:
|
parsing_regexes:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
Regex to extract the answer from generated response
|
||||||
aggregation_functions:
|
aggregation_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/AggregationFunctionType'
|
$ref: '#/components/schemas/AggregationFunctionType'
|
||||||
|
description: >-
|
||||||
|
Aggregation functions to apply to the scores of each row
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- parsing_regexes
|
- parsing_regexes
|
||||||
- aggregation_functions
|
- aggregation_functions
|
||||||
title: RegexParserScoringFnParams
|
title: RegexParserScoringFnParams
|
||||||
|
description: >-
|
||||||
|
Parameters for regex parser scoring function configuration.
|
||||||
ScoringFnParams:
|
ScoringFnParams:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
|
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
|
||||||
|
@ -6821,6 +6849,8 @@ components:
|
||||||
- regex_parser
|
- regex_parser
|
||||||
- basic
|
- basic
|
||||||
title: ScoringFnParamsType
|
title: ScoringFnParamsType
|
||||||
|
description: >-
|
||||||
|
Types of scoring function parameter configurations.
|
||||||
EvaluateRowsRequest:
|
EvaluateRowsRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7742,9 +7772,10 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
title: ResourceType
|
|
||||||
const: scoring_function
|
const: scoring_function
|
||||||
default: scoring_function
|
default: scoring_function
|
||||||
|
description: >-
|
||||||
|
The resource type, always scoring_function
|
||||||
description:
|
description:
|
||||||
type: string
|
type: string
|
||||||
metadata:
|
metadata:
|
||||||
|
@ -7769,6 +7800,8 @@ components:
|
||||||
- metadata
|
- metadata
|
||||||
- return_type
|
- return_type
|
||||||
title: ScoringFn
|
title: ScoringFn
|
||||||
|
description: >-
|
||||||
|
A scoring function resource for evaluating model outputs.
|
||||||
StringType:
|
StringType:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -11587,14 +11620,20 @@ components:
|
||||||
properties:
|
properties:
|
||||||
dataset_id:
|
dataset_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The identifier of the dataset that was scored
|
||||||
results:
|
results:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
$ref: '#/components/schemas/ScoringResult'
|
$ref: '#/components/schemas/ScoringResult'
|
||||||
|
description: >-
|
||||||
|
A map of scoring function name to ScoringResult
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- results
|
- results
|
||||||
title: ScoreBatchResponse
|
title: ScoreBatchResponse
|
||||||
|
description: >-
|
||||||
|
Response from batch scoring operations on datasets.
|
||||||
AlgorithmConfig:
|
AlgorithmConfig:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||||
|
|
|
@ -31,6 +31,11 @@ class ScoringResult(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoreBatchResponse(BaseModel):
|
class ScoreBatchResponse(BaseModel):
|
||||||
|
"""Response from batch scoring operations on datasets.
|
||||||
|
|
||||||
|
:param dataset_id: (Optional) The identifier of the dataset that was scored
|
||||||
|
:param results: A map of scoring function name to ScoringResult
|
||||||
|
"""
|
||||||
dataset_id: str | None = None
|
dataset_id: str | None = None
|
||||||
results: dict[str, ScoringResult]
|
results: dict[str, ScoringResult]
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,12 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
|
||||||
# with standard metrics so they can be rolled up?
|
# with standard metrics so they can be rolled up?
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoringFnParamsType(StrEnum):
|
class ScoringFnParamsType(StrEnum):
|
||||||
|
"""Types of scoring function parameter configurations.
|
||||||
|
|
||||||
|
:cvar llm_as_judge: Use an LLM model to evaluate and score responses
|
||||||
|
:cvar regex_parser: Use regex patterns to extract and score specific parts of responses
|
||||||
|
:cvar basic: Basic scoring with simple aggregation functions
|
||||||
|
"""
|
||||||
llm_as_judge = "llm_as_judge"
|
llm_as_judge = "llm_as_judge"
|
||||||
regex_parser = "regex_parser"
|
regex_parser = "regex_parser"
|
||||||
basic = "basic"
|
basic = "basic"
|
||||||
|
@ -32,6 +38,14 @@ class ScoringFnParamsType(StrEnum):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AggregationFunctionType(StrEnum):
|
class AggregationFunctionType(StrEnum):
|
||||||
|
"""Types of aggregation functions for scoring results.
|
||||||
|
|
||||||
|
:cvar average: Calculate the arithmetic mean of scores
|
||||||
|
:cvar weighted_average: Calculate a weighted average of scores
|
||||||
|
:cvar median: Calculate the median value of scores
|
||||||
|
:cvar categorical_count: Count occurrences of categorical values
|
||||||
|
:cvar accuracy: Calculate accuracy as the proportion of correct answers
|
||||||
|
"""
|
||||||
average = "average"
|
average = "average"
|
||||||
weighted_average = "weighted_average"
|
weighted_average = "weighted_average"
|
||||||
median = "median"
|
median = "median"
|
||||||
|
@ -41,6 +55,14 @@ class AggregationFunctionType(StrEnum):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class LLMAsJudgeScoringFnParams(BaseModel):
|
class LLMAsJudgeScoringFnParams(BaseModel):
|
||||||
|
"""Parameters for LLM-as-judge scoring function configuration.
|
||||||
|
|
||||||
|
:param type: The type of scoring function parameters, always llm_as_judge
|
||||||
|
:param judge_model: Identifier of the LLM model to use as a judge for scoring
|
||||||
|
:param prompt_template: (Optional) Custom prompt template for the judge model
|
||||||
|
:param judge_score_regexes: Regexes to extract the answer from generated response
|
||||||
|
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||||
|
"""
|
||||||
type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge
|
type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge
|
||||||
judge_model: str
|
judge_model: str
|
||||||
prompt_template: str | None = None
|
prompt_template: str | None = None
|
||||||
|
@ -56,6 +78,12 @@ class LLMAsJudgeScoringFnParams(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class RegexParserScoringFnParams(BaseModel):
|
class RegexParserScoringFnParams(BaseModel):
|
||||||
|
"""Parameters for regex parser scoring function configuration.
|
||||||
|
|
||||||
|
:param type: The type of scoring function parameters, always regex_parser
|
||||||
|
:param parsing_regexes: Regex to extract the answer from generated response
|
||||||
|
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||||
|
"""
|
||||||
type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser
|
type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser
|
||||||
parsing_regexes: list[str] = Field(
|
parsing_regexes: list[str] = Field(
|
||||||
description="Regex to extract the answer from generated response",
|
description="Regex to extract the answer from generated response",
|
||||||
|
@ -69,6 +97,11 @@ class RegexParserScoringFnParams(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class BasicScoringFnParams(BaseModel):
|
class BasicScoringFnParams(BaseModel):
|
||||||
|
"""Parameters for basic scoring function configuration.
|
||||||
|
|
||||||
|
:param type: The type of scoring function parameters, always basic
|
||||||
|
:param aggregation_functions: Aggregation functions to apply to the scores of each row
|
||||||
|
"""
|
||||||
type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic
|
type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic
|
||||||
aggregation_functions: list[AggregationFunctionType] = Field(
|
aggregation_functions: list[AggregationFunctionType] = Field(
|
||||||
description="Aggregation functions to apply to the scores of each row",
|
description="Aggregation functions to apply to the scores of each row",
|
||||||
|
@ -100,6 +133,10 @@ class CommonScoringFnFields(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoringFn(CommonScoringFnFields, Resource):
|
class ScoringFn(CommonScoringFnFields, Resource):
|
||||||
|
"""A scoring function resource for evaluating model outputs.
|
||||||
|
|
||||||
|
:param type: The resource type, always scoring_function
|
||||||
|
"""
|
||||||
type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function
|
type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue