This commit is contained in:
Xi Yan 2025-03-13 15:35:09 -07:00
parent 2cf769e05e
commit 819ffe0518
3 changed files with 190 additions and 176 deletions

View file

@ -962,7 +962,7 @@
}
}
},
"/v1/eval/rows": {
"/v1/eval/evaluate_rows": {
"post": {
"responses": {
"200": {
@ -3631,49 +3631,6 @@
}
}
},
"/v1/scoring/rows": {
"post": {
"responses": {
"200": {
"description": "ScoreResponse object containing rows and aggregated results",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Scoring"
],
"description": "Score a list of rows.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreRequest"
}
}
},
"required": true
}
}
},
"/v1/scoring/jobs": {
"post": {
"responses": {
@ -3717,6 +3674,49 @@
}
}
},
"/v1/scoring/score-rows": {
"post": {
"responses": {
"200": {
"description": "ScoreResponse object containing rows and aggregated results",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Scoring"
],
"description": "Score a list of rows.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreRowsRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/supervised-fine-tune": {
"post": {
"responses": {
@ -8714,7 +8714,7 @@
"type": "string",
"description": "A description of the scoring function type. - E.g. Write your custom judge prompt to score the answer."
},
"supported_purposes": {
"supported_dataset_purposes": {
"type": "array",
"items": {
"type": "string",
@ -8736,7 +8736,7 @@
"required": [
"type",
"description",
"supported_purposes"
"supported_dataset_purposes"
],
"title": "ScoringFnTypeInfo"
},
@ -10181,7 +10181,46 @@
],
"title": "SaveSpansToDatasetRequest"
},
"ScoreRequest": {
"ScoreDatasetRequest": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"scoring_fn_ids": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"dataset_id",
"scoring_fn_ids"
],
"title": "ScoreDatasetRequest"
},
"ScoreBatchResponse": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"results": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
}
}
},
"additionalProperties": false,
"required": [
"results"
],
"title": "ScoreBatchResponse"
},
"ScoreRowsRequest": {
"type": "object",
"properties": {
"dataset_rows": {
@ -10226,7 +10265,7 @@
"dataset_rows",
"scoring_fn_ids"
],
"title": "ScoreRequest"
"title": "ScoreRowsRequest"
},
"ScoreResponse": {
"type": "object",
@ -10246,45 +10285,6 @@
"title": "ScoreResponse",
"description": "The response from scoring."
},
"ScoreDatasetRequest": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"scoring_fn_ids": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"dataset_id",
"scoring_fn_ids"
],
"title": "ScoreDatasetRequest"
},
"ScoreBatchResponse": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"results": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
}
}
},
"additionalProperties": false,
"required": [
"results"
],
"title": "ScoreBatchResponse"
},
"AlgorithmConfig": {
"oneOf": [
{

View file

@ -659,7 +659,7 @@ paths:
schema:
$ref: '#/components/schemas/EvaluateBenchmarkRequest'
required: true
/v1/eval/rows:
/v1/eval/evaluate_rows:
post:
responses:
'200':
@ -2467,36 +2467,6 @@ paths:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
/v1/scoring/rows:
post:
responses:
'200':
description: >-
ScoreResponse object containing rows and aggregated results
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Scoring
description: Score a list of rows.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreRequest'
required: true
/v1/scoring/jobs:
post:
responses:
@ -2526,6 +2496,36 @@ paths:
schema:
$ref: '#/components/schemas/ScoreDatasetRequest'
required: true
/v1/scoring/score-rows:
post:
responses:
'200':
description: >-
ScoreResponse object containing rows and aggregated results
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Scoring
description: Score a list of rows.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreRowsRequest'
required: true
/v1/post-training/supervised-fine-tune:
post:
responses:
@ -6019,7 +6019,7 @@ components:
description: >-
A description of the scoring function type. - E.g. Write your custom judge
prompt to score the answer.
supported_purposes:
supported_dataset_purposes:
type: array
items:
type: string
@ -6039,7 +6039,7 @@ components:
required:
- type
- description
- supported_purposes
- supported_dataset_purposes
title: ScoringFnTypeInfo
ListScoringFunctionTypesResponse:
type: object
@ -6982,47 +6982,6 @@ components:
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
ScoreRequest:
type: object
properties:
dataset_rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows to score.
scoring_fn_ids:
type: array
items:
type: string
description: >-
The scoring function ids to use for the scoring.
additionalProperties: false
required:
- dataset_rows
- scoring_fn_ids
title: ScoreRequest
ScoreResponse:
type: object
properties:
results:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: >-
A map of scoring function name to ScoringResult.
additionalProperties: false
required:
- results
title: ScoreResponse
description: The response from scoring.
ScoreDatasetRequest:
type: object
properties:
@ -7050,6 +7009,47 @@ components:
required:
- results
title: ScoreBatchResponse
ScoreRowsRequest:
type: object
properties:
dataset_rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows to score.
scoring_fn_ids:
type: array
items:
type: string
description: >-
The scoring function ids to use for the scoring.
additionalProperties: false
required:
- dataset_rows
- scoring_fn_ids
title: ScoreRowsRequest
ScoreResponse:
type: object
properties:
results:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: >-
A map of scoring function name to ScoringResult.
additionalProperties: false
required:
- results
title: ScoreResponse
description: The response from scoring.
AlgorithmConfig:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'

View file

@ -12,16 +12,17 @@ from typing import (
Literal,
Optional,
Protocol,
Union,
runtime_checkable,
Union,
)
from pydantic import BaseModel, Field
from typing_extensions import Annotated
from llama_stack.apis.datasets import DatasetPurpose
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack.apis.datasets import DatasetPurpose
# Perhaps more structure can be imposed on these functions. Maybe they could be associated
# with standard metrics so they can be rolled up?
@ -93,6 +94,7 @@ class RegexParserScoringFnParams(BaseModel):
default_factory=list,
)
class CustomLLMAsJudgeScoringFnParams(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
judge_model: str
@ -102,6 +104,7 @@ class CustomLLMAsJudgeScoringFnParams(BaseModel):
default_factory=list,
)
@json_schema_type
class RegexParserScoringFn(BaseModel):
type: Literal["regex_parser"] = "regex_parser"
@ -113,36 +116,43 @@ class RegexParserMathScoringFn(BaseModel):
type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
regex_parser_math_response: RegexParserScoringFnParams
@json_schema_type
class EqualityScoringFn(BaseModel):
type: Literal["equality"] = "equality"
equality: BasicScoringFnParams
@json_schema_type
class SubsetOfScoringFn(BaseModel):
type: Literal["subset_of"] = "subset_of"
subset_of: BasicScoringFnParams
@json_schema_type
class FactualityScoringFn(BaseModel):
type: Literal["factuality"] = "factuality"
factuality: BasicScoringFnParams
@json_schema_type
class FaithfulnessScoringFn(BaseModel):
type: Literal["faithfulness"] = "faithfulness"
faithfulness: BasicScoringFnParams
@json_schema_type
class AnswerCorrectnessScoringFn(BaseModel):
type: Literal["answer_correctness"] = "answer_correctness"
answer_correctness: BasicScoringFnParams
@json_schema_type
class AnswerRelevancyScoringFn(BaseModel):
type: Literal["answer_relevancy"] = "answer_relevancy"
answer_relevancy: BasicScoringFnParams
@json_schema_type
class AnswerSimilarityScoringFn(BaseModel):
type: Literal["answer_similarity"] = "answer_similarity"
@ -208,6 +218,7 @@ class CommonScoringFnFields(BaseModel):
:param fn: The scoring function type and parameters.
:param metadata: (Optional) Any additional metadata for this definition (e.g. description).
"""
fn: ScoringFnDefinition
metadata: Dict[str, Any] = Field(
default_factory=dict,
@ -217,7 +228,9 @@ class CommonScoringFnFields(BaseModel):
@json_schema_type
class ScoringFn(CommonScoringFnFields, Resource):
type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value
type: Literal[ResourceType.scoring_function.value] = (
ResourceType.scoring_function.value
)
@property
def scoring_fn_id(self) -> str:
@ -234,11 +247,12 @@ class ScoringFnTypeInfo(BaseModel):
:param type: The type of scoring function.
:param description: A description of the scoring function type.
- E.g. Write your custom judge prompt to score the answer.
:param supported_purposes: The purposes that this scoring function can be used for.
:param supported_dataset_purposes: The purposes that this scoring function can be used for.
"""
type: ScoringFunctionType
description: str
supported_purposes: List[DatasetPurpose] = Field(
supported_dataset_purposes: List[DatasetPurpose] = Field(
description="The supported purposes (supported dataset schema) that this scoring function can be used for. E.g. eval/question-answer",
default_factory=list,
)