mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 11:08:20 +00:00
score
This commit is contained in:
parent
2cf769e05e
commit
819ffe0518
3 changed files with 190 additions and 176 deletions
174
docs/_static/llama-stack-spec.html
vendored
174
docs/_static/llama-stack-spec.html
vendored
|
@ -962,7 +962,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/eval/rows": {
|
"/v1/eval/evaluate_rows": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -3631,49 +3631,6 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/scoring/rows": {
|
|
||||||
"post": {
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "ScoreResponse object containing rows and aggregated results",
|
|
||||||
"content": {
|
|
||||||
"application/json": {
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/components/schemas/ScoreResponse"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"400": {
|
|
||||||
"$ref": "#/components/responses/BadRequest400"
|
|
||||||
},
|
|
||||||
"429": {
|
|
||||||
"$ref": "#/components/responses/TooManyRequests429"
|
|
||||||
},
|
|
||||||
"500": {
|
|
||||||
"$ref": "#/components/responses/InternalServerError500"
|
|
||||||
},
|
|
||||||
"default": {
|
|
||||||
"$ref": "#/components/responses/DefaultError"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tags": [
|
|
||||||
"Scoring"
|
|
||||||
],
|
|
||||||
"description": "Score a list of rows.",
|
|
||||||
"parameters": [],
|
|
||||||
"requestBody": {
|
|
||||||
"content": {
|
|
||||||
"application/json": {
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/components/schemas/ScoreRequest"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"/v1/scoring/jobs": {
|
"/v1/scoring/jobs": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -3717,6 +3674,49 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/scoring/score-rows": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "ScoreResponse object containing rows and aggregated results",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ScoreResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Scoring"
|
||||||
|
],
|
||||||
|
"description": "Score a list of rows.",
|
||||||
|
"parameters": [],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ScoreRowsRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/post-training/supervised-fine-tune": {
|
"/v1/post-training/supervised-fine-tune": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -8714,7 +8714,7 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "A description of the scoring function type. - E.g. Write your custom judge prompt to score the answer."
|
"description": "A description of the scoring function type. - E.g. Write your custom judge prompt to score the answer."
|
||||||
},
|
},
|
||||||
"supported_purposes": {
|
"supported_dataset_purposes": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
@ -8736,7 +8736,7 @@
|
||||||
"required": [
|
"required": [
|
||||||
"type",
|
"type",
|
||||||
"description",
|
"description",
|
||||||
"supported_purposes"
|
"supported_dataset_purposes"
|
||||||
],
|
],
|
||||||
"title": "ScoringFnTypeInfo"
|
"title": "ScoringFnTypeInfo"
|
||||||
},
|
},
|
||||||
|
@ -10181,7 +10181,46 @@
|
||||||
],
|
],
|
||||||
"title": "SaveSpansToDatasetRequest"
|
"title": "SaveSpansToDatasetRequest"
|
||||||
},
|
},
|
||||||
"ScoreRequest": {
|
"ScoreDatasetRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"dataset_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"scoring_fn_ids": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"dataset_id",
|
||||||
|
"scoring_fn_ids"
|
||||||
|
],
|
||||||
|
"title": "ScoreDatasetRequest"
|
||||||
|
},
|
||||||
|
"ScoreBatchResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"dataset_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"results": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"$ref": "#/components/schemas/ScoringResult"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"results"
|
||||||
|
],
|
||||||
|
"title": "ScoreBatchResponse"
|
||||||
|
},
|
||||||
|
"ScoreRowsRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"dataset_rows": {
|
"dataset_rows": {
|
||||||
|
@ -10226,7 +10265,7 @@
|
||||||
"dataset_rows",
|
"dataset_rows",
|
||||||
"scoring_fn_ids"
|
"scoring_fn_ids"
|
||||||
],
|
],
|
||||||
"title": "ScoreRequest"
|
"title": "ScoreRowsRequest"
|
||||||
},
|
},
|
||||||
"ScoreResponse": {
|
"ScoreResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -10246,45 +10285,6 @@
|
||||||
"title": "ScoreResponse",
|
"title": "ScoreResponse",
|
||||||
"description": "The response from scoring."
|
"description": "The response from scoring."
|
||||||
},
|
},
|
||||||
"ScoreDatasetRequest": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"dataset_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"scoring_fn_ids": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"dataset_id",
|
|
||||||
"scoring_fn_ids"
|
|
||||||
],
|
|
||||||
"title": "ScoreDatasetRequest"
|
|
||||||
},
|
|
||||||
"ScoreBatchResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"dataset_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"results": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"$ref": "#/components/schemas/ScoringResult"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"results"
|
|
||||||
],
|
|
||||||
"title": "ScoreBatchResponse"
|
|
||||||
},
|
|
||||||
"AlgorithmConfig": {
|
"AlgorithmConfig": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
|
|
148
docs/_static/llama-stack-spec.yaml
vendored
148
docs/_static/llama-stack-spec.yaml
vendored
|
@ -659,7 +659,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/EvaluateBenchmarkRequest'
|
$ref: '#/components/schemas/EvaluateBenchmarkRequest'
|
||||||
required: true
|
required: true
|
||||||
/v1/eval/rows:
|
/v1/eval/evaluate_rows:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -2467,36 +2467,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
|
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
|
||||||
required: true
|
required: true
|
||||||
/v1/scoring/rows:
|
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: >-
|
|
||||||
ScoreResponse object containing rows and aggregated results
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ScoreResponse'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Scoring
|
|
||||||
description: Score a list of rows.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ScoreRequest'
|
|
||||||
required: true
|
|
||||||
/v1/scoring/jobs:
|
/v1/scoring/jobs:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -2526,6 +2496,36 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/ScoreDatasetRequest'
|
$ref: '#/components/schemas/ScoreDatasetRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1/scoring/score-rows:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: >-
|
||||||
|
ScoreResponse object containing rows and aggregated results
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ScoreResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Scoring
|
||||||
|
description: Score a list of rows.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ScoreRowsRequest'
|
||||||
|
required: true
|
||||||
/v1/post-training/supervised-fine-tune:
|
/v1/post-training/supervised-fine-tune:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -6019,7 +6019,7 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
A description of the scoring function type. - E.g. Write your custom judge
|
A description of the scoring function type. - E.g. Write your custom judge
|
||||||
prompt to score the answer.
|
prompt to score the answer.
|
||||||
supported_purposes:
|
supported_dataset_purposes:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
@ -6039,7 +6039,7 @@ components:
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
- description
|
- description
|
||||||
- supported_purposes
|
- supported_dataset_purposes
|
||||||
title: ScoringFnTypeInfo
|
title: ScoringFnTypeInfo
|
||||||
ListScoringFunctionTypesResponse:
|
ListScoringFunctionTypesResponse:
|
||||||
type: object
|
type: object
|
||||||
|
@ -6982,47 +6982,6 @@ components:
|
||||||
- attributes_to_save
|
- attributes_to_save
|
||||||
- dataset_id
|
- dataset_id
|
||||||
title: SaveSpansToDatasetRequest
|
title: SaveSpansToDatasetRequest
|
||||||
ScoreRequest:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
dataset_rows:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
description: The rows to score.
|
|
||||||
scoring_fn_ids:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The scoring function ids to use for the scoring.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- dataset_rows
|
|
||||||
- scoring_fn_ids
|
|
||||||
title: ScoreRequest
|
|
||||||
ScoreResponse:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
results:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
$ref: '#/components/schemas/ScoringResult'
|
|
||||||
description: >-
|
|
||||||
A map of scoring function name to ScoringResult.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- results
|
|
||||||
title: ScoreResponse
|
|
||||||
description: The response from scoring.
|
|
||||||
ScoreDatasetRequest:
|
ScoreDatasetRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7050,6 +7009,47 @@ components:
|
||||||
required:
|
required:
|
||||||
- results
|
- results
|
||||||
title: ScoreBatchResponse
|
title: ScoreBatchResponse
|
||||||
|
ScoreRowsRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
dataset_rows:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
description: The rows to score.
|
||||||
|
scoring_fn_ids:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The scoring function ids to use for the scoring.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- dataset_rows
|
||||||
|
- scoring_fn_ids
|
||||||
|
title: ScoreRowsRequest
|
||||||
|
ScoreResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
results:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
$ref: '#/components/schemas/ScoringResult'
|
||||||
|
description: >-
|
||||||
|
A map of scoring function name to ScoringResult.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- results
|
||||||
|
title: ScoreResponse
|
||||||
|
description: The response from scoring.
|
||||||
AlgorithmConfig:
|
AlgorithmConfig:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||||
|
|
|
@ -12,16 +12,17 @@ from typing import (
|
||||||
Literal,
|
Literal,
|
||||||
Optional,
|
Optional,
|
||||||
Protocol,
|
Protocol,
|
||||||
Union,
|
|
||||||
runtime_checkable,
|
runtime_checkable,
|
||||||
|
Union,
|
||||||
)
|
)
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
|
from llama_stack.apis.datasets import DatasetPurpose
|
||||||
|
|
||||||
from llama_stack.apis.resource import Resource, ResourceType
|
from llama_stack.apis.resource import Resource, ResourceType
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
from llama_stack.apis.datasets import DatasetPurpose
|
|
||||||
|
|
||||||
# Perhaps more structure can be imposed on these functions. Maybe they could be associated
|
# Perhaps more structure can be imposed on these functions. Maybe they could be associated
|
||||||
# with standard metrics so they can be rolled up?
|
# with standard metrics so they can be rolled up?
|
||||||
|
@ -93,6 +94,7 @@ class RegexParserScoringFnParams(BaseModel):
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CustomLLMAsJudgeScoringFnParams(BaseModel):
|
class CustomLLMAsJudgeScoringFnParams(BaseModel):
|
||||||
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
|
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
|
||||||
judge_model: str
|
judge_model: str
|
||||||
|
@ -102,6 +104,7 @@ class CustomLLMAsJudgeScoringFnParams(BaseModel):
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class RegexParserScoringFn(BaseModel):
|
class RegexParserScoringFn(BaseModel):
|
||||||
type: Literal["regex_parser"] = "regex_parser"
|
type: Literal["regex_parser"] = "regex_parser"
|
||||||
|
@ -113,36 +116,43 @@ class RegexParserMathScoringFn(BaseModel):
|
||||||
type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
|
type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
|
||||||
regex_parser_math_response: RegexParserScoringFnParams
|
regex_parser_math_response: RegexParserScoringFnParams
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class EqualityScoringFn(BaseModel):
|
class EqualityScoringFn(BaseModel):
|
||||||
type: Literal["equality"] = "equality"
|
type: Literal["equality"] = "equality"
|
||||||
equality: BasicScoringFnParams
|
equality: BasicScoringFnParams
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class SubsetOfScoringFn(BaseModel):
|
class SubsetOfScoringFn(BaseModel):
|
||||||
type: Literal["subset_of"] = "subset_of"
|
type: Literal["subset_of"] = "subset_of"
|
||||||
subset_of: BasicScoringFnParams
|
subset_of: BasicScoringFnParams
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class FactualityScoringFn(BaseModel):
|
class FactualityScoringFn(BaseModel):
|
||||||
type: Literal["factuality"] = "factuality"
|
type: Literal["factuality"] = "factuality"
|
||||||
factuality: BasicScoringFnParams
|
factuality: BasicScoringFnParams
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class FaithfulnessScoringFn(BaseModel):
|
class FaithfulnessScoringFn(BaseModel):
|
||||||
type: Literal["faithfulness"] = "faithfulness"
|
type: Literal["faithfulness"] = "faithfulness"
|
||||||
faithfulness: BasicScoringFnParams
|
faithfulness: BasicScoringFnParams
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AnswerCorrectnessScoringFn(BaseModel):
|
class AnswerCorrectnessScoringFn(BaseModel):
|
||||||
type: Literal["answer_correctness"] = "answer_correctness"
|
type: Literal["answer_correctness"] = "answer_correctness"
|
||||||
answer_correctness: BasicScoringFnParams
|
answer_correctness: BasicScoringFnParams
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AnswerRelevancyScoringFn(BaseModel):
|
class AnswerRelevancyScoringFn(BaseModel):
|
||||||
type: Literal["answer_relevancy"] = "answer_relevancy"
|
type: Literal["answer_relevancy"] = "answer_relevancy"
|
||||||
answer_relevancy: BasicScoringFnParams
|
answer_relevancy: BasicScoringFnParams
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class AnswerSimilarityScoringFn(BaseModel):
|
class AnswerSimilarityScoringFn(BaseModel):
|
||||||
type: Literal["answer_similarity"] = "answer_similarity"
|
type: Literal["answer_similarity"] = "answer_similarity"
|
||||||
|
@ -205,9 +215,10 @@ ScoringFnDefinition = register_schema(
|
||||||
|
|
||||||
class CommonScoringFnFields(BaseModel):
|
class CommonScoringFnFields(BaseModel):
|
||||||
"""
|
"""
|
||||||
:param fn: The scoring function type and parameters.
|
:param fn: The scoring function type and parameters.
|
||||||
:param metadata: (Optional) Any additional metadata for this definition (e.g. description).
|
:param metadata: (Optional) Any additional metadata for this definition (e.g. description).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fn: ScoringFnDefinition
|
fn: ScoringFnDefinition
|
||||||
metadata: Dict[str, Any] = Field(
|
metadata: Dict[str, Any] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
|
@ -217,7 +228,9 @@ class CommonScoringFnFields(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoringFn(CommonScoringFnFields, Resource):
|
class ScoringFn(CommonScoringFnFields, Resource):
|
||||||
type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value
|
type: Literal[ResourceType.scoring_function.value] = (
|
||||||
|
ResourceType.scoring_function.value
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def scoring_fn_id(self) -> str:
|
def scoring_fn_id(self) -> str:
|
||||||
|
@ -231,14 +244,15 @@ class ScoringFn(CommonScoringFnFields, Resource):
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoringFnTypeInfo(BaseModel):
|
class ScoringFnTypeInfo(BaseModel):
|
||||||
"""
|
"""
|
||||||
:param type: The type of scoring function.
|
:param type: The type of scoring function.
|
||||||
:param description: A description of the scoring function type.
|
:param description: A description of the scoring function type.
|
||||||
- E.g. Write your custom judge prompt to score the answer.
|
- E.g. Write your custom judge prompt to score the answer.
|
||||||
:param supported_purposes: The purposes that this scoring function can be used for.
|
:param supported_dataset_purposes: The purposes that this scoring function can be used for.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
type: ScoringFunctionType
|
type: ScoringFunctionType
|
||||||
description: str
|
description: str
|
||||||
supported_purposes: List[DatasetPurpose] = Field(
|
supported_dataset_purposes: List[DatasetPurpose] = Field(
|
||||||
description="The supported purposes (supported dataset schema) that this scoring function can be used for. E.g. eval/question-answer",
|
description="The supported purposes (supported dataset schema) that this scoring function can be used for. E.g. eval/question-answer",
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
)
|
)
|
||||||
|
@ -261,16 +275,16 @@ class ListScoringFunctionTypesResponse(BaseModel):
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class ScoringFunctions(Protocol):
|
class ScoringFunctions(Protocol):
|
||||||
@webmethod(route="/scoring-functions", method="GET")
|
@webmethod(route="/scoring-functions", method="GET")
|
||||||
async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
|
async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
|
||||||
"""
|
"""
|
||||||
List all registered scoring functions.
|
List all registered scoring functions.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/scoring-functions/types", method="GET")
|
@webmethod(route="/scoring-functions/types", method="GET")
|
||||||
async def list_scoring_function_types(self) -> ListScoringFunctionTypesResponse:
|
async def list_scoring_function_types(self) -> ListScoringFunctionTypesResponse:
|
||||||
"""
|
"""
|
||||||
List all available scoring function types information and how to use them.
|
List all available scoring function types information and how to use them.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -278,7 +292,7 @@ class ScoringFunctions(Protocol):
|
||||||
async def get_scoring_function(
|
async def get_scoring_function(
|
||||||
self,
|
self,
|
||||||
scoring_fn_id: str,
|
scoring_fn_id: str,
|
||||||
) -> Optional[ScoringFn]:
|
) -> Optional[ScoringFn]:
|
||||||
"""
|
"""
|
||||||
Get a scoring function by its ID.
|
Get a scoring function by its ID.
|
||||||
:param scoring_fn_id: The ID of the scoring function to get.
|
:param scoring_fn_id: The ID of the scoring function to get.
|
||||||
|
@ -302,12 +316,12 @@ class ScoringFunctions(Protocol):
|
||||||
- E.g. {"description": "This scoring function is used for ..."}
|
- E.g. {"description": "This scoring function is used for ..."}
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
|
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
|
||||||
async def unregister_scoring_function(
|
async def unregister_scoring_function(
|
||||||
self,
|
self,
|
||||||
scoring_fn_id: str,
|
scoring_fn_id: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Unregister a scoring function by its ID.
|
Unregister a scoring function by its ID.
|
||||||
:param scoring_fn_id: The ID of the scoring function to unregister.
|
:param scoring_fn_id: The ID of the scoring function to unregister.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue