forked from phoenix-oss/llama-stack-mirror
remove aggregation functions
This commit is contained in:
parent
64388de068
commit
2723b05164
3 changed files with 7 additions and 262 deletions
142
docs/_static/llama-stack-spec.html
vendored
142
docs/_static/llama-stack-spec.html
vendored
|
@ -6505,36 +6505,11 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "equality",
|
"const": "equality",
|
||||||
"default": "equality"
|
"default": "equality"
|
||||||
},
|
|
||||||
"equality": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"aggregation_functions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"average",
|
|
||||||
"median",
|
|
||||||
"categorical_count",
|
|
||||||
"accuracy"
|
|
||||||
],
|
|
||||||
"title": "AggregationFunctionType",
|
|
||||||
"description": "A type of aggregation function."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"aggregation_functions"
|
|
||||||
],
|
|
||||||
"title": "BasicGraderParams"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"type",
|
"type"
|
||||||
"equality"
|
|
||||||
],
|
],
|
||||||
"title": "EqualityGrader"
|
"title": "EqualityGrader"
|
||||||
},
|
},
|
||||||
|
@ -6545,36 +6520,11 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "factuality",
|
"const": "factuality",
|
||||||
"default": "factuality"
|
"default": "factuality"
|
||||||
},
|
|
||||||
"factuality": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"aggregation_functions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"average",
|
|
||||||
"median",
|
|
||||||
"categorical_count",
|
|
||||||
"accuracy"
|
|
||||||
],
|
|
||||||
"title": "AggregationFunctionType",
|
|
||||||
"description": "A type of aggregation function."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"aggregation_functions"
|
|
||||||
],
|
|
||||||
"title": "BasicGraderParams"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"type",
|
"type"
|
||||||
"factuality"
|
|
||||||
],
|
],
|
||||||
"title": "FactualityGrader"
|
"title": "FactualityGrader"
|
||||||
},
|
},
|
||||||
|
@ -6585,36 +6535,11 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "faithfulness",
|
"const": "faithfulness",
|
||||||
"default": "faithfulness"
|
"default": "faithfulness"
|
||||||
},
|
|
||||||
"faithfulness": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"aggregation_functions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"average",
|
|
||||||
"median",
|
|
||||||
"categorical_count",
|
|
||||||
"accuracy"
|
|
||||||
],
|
|
||||||
"title": "AggregationFunctionType",
|
|
||||||
"description": "A type of aggregation function."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"aggregation_functions"
|
|
||||||
],
|
|
||||||
"title": "BasicGraderParams"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"type",
|
"type"
|
||||||
"faithfulness"
|
|
||||||
],
|
],
|
||||||
"title": "FaithfulnessGrader"
|
"title": "FaithfulnessGrader"
|
||||||
},
|
},
|
||||||
|
@ -6733,28 +6658,13 @@
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"aggregation_functions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"average",
|
|
||||||
"median",
|
|
||||||
"categorical_count",
|
|
||||||
"accuracy"
|
|
||||||
],
|
|
||||||
"title": "AggregationFunctionType",
|
|
||||||
"description": "A type of aggregation function."
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"model",
|
"model",
|
||||||
"prompt",
|
"prompt",
|
||||||
"score_regexes",
|
"score_regexes"
|
||||||
"aggregation_functions"
|
|
||||||
],
|
],
|
||||||
"title": "LlmGraderParams"
|
"title": "LlmGraderParams"
|
||||||
}
|
}
|
||||||
|
@ -6782,26 +6692,11 @@
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
|
||||||
"aggregation_functions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"average",
|
|
||||||
"median",
|
|
||||||
"categorical_count",
|
|
||||||
"accuracy"
|
|
||||||
],
|
|
||||||
"title": "AggregationFunctionType",
|
|
||||||
"description": "A type of aggregation function."
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"parsing_regexes",
|
"parsing_regexes"
|
||||||
"aggregation_functions"
|
|
||||||
],
|
],
|
||||||
"title": "RegexParserGraderParams"
|
"title": "RegexParserGraderParams"
|
||||||
}
|
}
|
||||||
|
@ -6820,36 +6715,11 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "subset_of",
|
"const": "subset_of",
|
||||||
"default": "subset_of"
|
"default": "subset_of"
|
||||||
},
|
|
||||||
"subset_of": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"aggregation_functions": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"enum": [
|
|
||||||
"average",
|
|
||||||
"median",
|
|
||||||
"categorical_count",
|
|
||||||
"accuracy"
|
|
||||||
],
|
|
||||||
"title": "AggregationFunctionType",
|
|
||||||
"description": "A type of aggregation function."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"aggregation_functions"
|
|
||||||
],
|
|
||||||
"title": "BasicGraderParams"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"type",
|
"type"
|
||||||
"subset_of"
|
|
||||||
],
|
],
|
||||||
"title": "SubsetOfGrader"
|
"title": "SubsetOfGrader"
|
||||||
},
|
},
|
||||||
|
|
100
docs/_static/llama-stack-spec.yaml
vendored
100
docs/_static/llama-stack-spec.yaml
vendored
|
@ -4557,28 +4557,9 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: equality
|
const: equality
|
||||||
default: equality
|
default: equality
|
||||||
equality:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
aggregation_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- average
|
|
||||||
- median
|
|
||||||
- categorical_count
|
|
||||||
- accuracy
|
|
||||||
title: AggregationFunctionType
|
|
||||||
description: A type of aggregation function.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- aggregation_functions
|
|
||||||
title: BasicGraderParams
|
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- equality
|
|
||||||
title: EqualityGrader
|
title: EqualityGrader
|
||||||
FactualityGrader:
|
FactualityGrader:
|
||||||
type: object
|
type: object
|
||||||
|
@ -4587,28 +4568,9 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: factuality
|
const: factuality
|
||||||
default: factuality
|
default: factuality
|
||||||
factuality:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
aggregation_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- average
|
|
||||||
- median
|
|
||||||
- categorical_count
|
|
||||||
- accuracy
|
|
||||||
title: AggregationFunctionType
|
|
||||||
description: A type of aggregation function.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- aggregation_functions
|
|
||||||
title: BasicGraderParams
|
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- factuality
|
|
||||||
title: FactualityGrader
|
title: FactualityGrader
|
||||||
FaithfulnessGrader:
|
FaithfulnessGrader:
|
||||||
type: object
|
type: object
|
||||||
|
@ -4617,28 +4579,9 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: faithfulness
|
const: faithfulness
|
||||||
default: faithfulness
|
default: faithfulness
|
||||||
faithfulness:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
aggregation_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- average
|
|
||||||
- median
|
|
||||||
- categorical_count
|
|
||||||
- accuracy
|
|
||||||
title: AggregationFunctionType
|
|
||||||
description: A type of aggregation function.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- aggregation_functions
|
|
||||||
title: BasicGraderParams
|
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- faithfulness
|
|
||||||
title: FaithfulnessGrader
|
title: FaithfulnessGrader
|
||||||
Grader:
|
Grader:
|
||||||
type: object
|
type: object
|
||||||
|
@ -4711,23 +4654,11 @@ components:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
aggregation_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- average
|
|
||||||
- median
|
|
||||||
- categorical_count
|
|
||||||
- accuracy
|
|
||||||
title: AggregationFunctionType
|
|
||||||
description: A type of aggregation function.
|
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
- prompt
|
- prompt
|
||||||
- score_regexes
|
- score_regexes
|
||||||
- aggregation_functions
|
|
||||||
title: LlmGraderParams
|
title: LlmGraderParams
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
|
@ -4748,21 +4679,9 @@ components:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
aggregation_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- average
|
|
||||||
- median
|
|
||||||
- categorical_count
|
|
||||||
- accuracy
|
|
||||||
title: AggregationFunctionType
|
|
||||||
description: A type of aggregation function.
|
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- parsing_regexes
|
- parsing_regexes
|
||||||
- aggregation_functions
|
|
||||||
title: RegexParserGraderParams
|
title: RegexParserGraderParams
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
|
@ -4776,28 +4695,9 @@ components:
|
||||||
type: string
|
type: string
|
||||||
const: subset_of
|
const: subset_of
|
||||||
default: subset_of
|
default: subset_of
|
||||||
subset_of:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
aggregation_functions:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- average
|
|
||||||
- median
|
|
||||||
- categorical_count
|
|
||||||
- accuracy
|
|
||||||
title: AggregationFunctionType
|
|
||||||
description: A type of aggregation function.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- aggregation_functions
|
|
||||||
title: BasicGraderParams
|
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- subset_of
|
|
||||||
title: SubsetOfGrader
|
title: SubsetOfGrader
|
||||||
Model:
|
Model:
|
||||||
type: object
|
type: object
|
||||||
|
|
|
@ -13,8 +13,8 @@ from typing import (
|
||||||
Literal,
|
Literal,
|
||||||
Optional,
|
Optional,
|
||||||
Protocol,
|
Protocol,
|
||||||
Union,
|
|
||||||
runtime_checkable,
|
runtime_checkable,
|
||||||
|
Union,
|
||||||
)
|
)
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
@ -63,35 +63,14 @@ class GraderTypeInfo(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AggregationFunctionType(Enum):
|
|
||||||
"""
|
|
||||||
A type of aggregation function.
|
|
||||||
:cvar average: Average the scores of each row.
|
|
||||||
:cvar median: Median the scores of each row.
|
|
||||||
:cvar categorical_count: Count the number of rows that match each category.
|
|
||||||
:cvar accuracy: Number of correct results over total results.
|
|
||||||
"""
|
|
||||||
|
|
||||||
average = "average"
|
|
||||||
median = "median"
|
|
||||||
categorical_count = "categorical_count"
|
|
||||||
accuracy = "accuracy"
|
|
||||||
|
|
||||||
|
|
||||||
class BasicGraderParams(BaseModel):
|
|
||||||
aggregation_functions: List[AggregationFunctionType]
|
|
||||||
|
|
||||||
|
|
||||||
class LlmGraderParams(BaseModel):
|
class LlmGraderParams(BaseModel):
|
||||||
model: str
|
model: str
|
||||||
prompt: str
|
prompt: str
|
||||||
score_regexes: List[str]
|
score_regexes: List[str]
|
||||||
aggregation_functions: List[AggregationFunctionType]
|
|
||||||
|
|
||||||
|
|
||||||
class RegexParserGraderParams(BaseModel):
|
class RegexParserGraderParams(BaseModel):
|
||||||
parsing_regexes: List[str]
|
parsing_regexes: List[str]
|
||||||
aggregation_functions: List[AggregationFunctionType]
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
@ -109,25 +88,21 @@ class RegexParserGrader(BaseModel):
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class EqualityGrader(BaseModel):
|
class EqualityGrader(BaseModel):
|
||||||
type: Literal["equality"] = "equality"
|
type: Literal["equality"] = "equality"
|
||||||
equality: BasicGraderParams
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class SubsetOfGrader(BaseModel):
|
class SubsetOfGrader(BaseModel):
|
||||||
type: Literal["subset_of"] = "subset_of"
|
type: Literal["subset_of"] = "subset_of"
|
||||||
subset_of: BasicGraderParams
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class FactualityGrader(BaseModel):
|
class FactualityGrader(BaseModel):
|
||||||
type: Literal["factuality"] = "factuality"
|
type: Literal["factuality"] = "factuality"
|
||||||
factuality: BasicGraderParams
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class FaithfulnessGrader(BaseModel):
|
class FaithfulnessGrader(BaseModel):
|
||||||
type: Literal["faithfulness"] = "faithfulness"
|
type: Literal["faithfulness"] = "faithfulness"
|
||||||
faithfulness: BasicGraderParams
|
|
||||||
|
|
||||||
|
|
||||||
GraderDefinition = register_schema(
|
GraderDefinition = register_schema(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue