This commit is contained in:
Xi Yan 2025-03-13 15:35:09 -07:00
parent 2cf769e05e
commit 819ffe0518
3 changed files with 190 additions and 176 deletions

View file

@ -962,7 +962,7 @@
} }
} }
}, },
"/v1/eval/rows": { "/v1/eval/evaluate_rows": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -3631,49 +3631,6 @@
} }
} }
}, },
"/v1/scoring/rows": {
"post": {
"responses": {
"200": {
"description": "ScoreResponse object containing rows and aggregated results",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Scoring"
],
"description": "Score a list of rows.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreRequest"
}
}
},
"required": true
}
}
},
"/v1/scoring/jobs": { "/v1/scoring/jobs": {
"post": { "post": {
"responses": { "responses": {
@ -3717,6 +3674,49 @@
} }
} }
}, },
"/v1/scoring/score-rows": {
"post": {
"responses": {
"200": {
"description": "ScoreResponse object containing rows and aggregated results",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Scoring"
],
"description": "Score a list of rows.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScoreRowsRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/supervised-fine-tune": { "/v1/post-training/supervised-fine-tune": {
"post": { "post": {
"responses": { "responses": {
@ -8714,7 +8714,7 @@
"type": "string", "type": "string",
"description": "A description of the scoring function type. - E.g. Write your custom judge prompt to score the answer." "description": "A description of the scoring function type. - E.g. Write your custom judge prompt to score the answer."
}, },
"supported_purposes": { "supported_dataset_purposes": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string", "type": "string",
@ -8736,7 +8736,7 @@
"required": [ "required": [
"type", "type",
"description", "description",
"supported_purposes" "supported_dataset_purposes"
], ],
"title": "ScoringFnTypeInfo" "title": "ScoringFnTypeInfo"
}, },
@ -10181,7 +10181,46 @@
], ],
"title": "SaveSpansToDatasetRequest" "title": "SaveSpansToDatasetRequest"
}, },
"ScoreRequest": { "ScoreDatasetRequest": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"scoring_fn_ids": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"dataset_id",
"scoring_fn_ids"
],
"title": "ScoreDatasetRequest"
},
"ScoreBatchResponse": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"results": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
}
}
},
"additionalProperties": false,
"required": [
"results"
],
"title": "ScoreBatchResponse"
},
"ScoreRowsRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"dataset_rows": { "dataset_rows": {
@ -10226,7 +10265,7 @@
"dataset_rows", "dataset_rows",
"scoring_fn_ids" "scoring_fn_ids"
], ],
"title": "ScoreRequest" "title": "ScoreRowsRequest"
}, },
"ScoreResponse": { "ScoreResponse": {
"type": "object", "type": "object",
@ -10246,45 +10285,6 @@
"title": "ScoreResponse", "title": "ScoreResponse",
"description": "The response from scoring." "description": "The response from scoring."
}, },
"ScoreDatasetRequest": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"scoring_fn_ids": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"required": [
"dataset_id",
"scoring_fn_ids"
],
"title": "ScoreDatasetRequest"
},
"ScoreBatchResponse": {
"type": "object",
"properties": {
"dataset_id": {
"type": "string"
},
"results": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/ScoringResult"
}
}
},
"additionalProperties": false,
"required": [
"results"
],
"title": "ScoreBatchResponse"
},
"AlgorithmConfig": { "AlgorithmConfig": {
"oneOf": [ "oneOf": [
{ {

View file

@ -659,7 +659,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/EvaluateBenchmarkRequest' $ref: '#/components/schemas/EvaluateBenchmarkRequest'
required: true required: true
/v1/eval/rows: /v1/eval/evaluate_rows:
post: post:
responses: responses:
'200': '200':
@ -2467,36 +2467,6 @@ paths:
schema: schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest' $ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true required: true
/v1/scoring/rows:
post:
responses:
'200':
description: >-
ScoreResponse object containing rows and aggregated results
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Scoring
description: Score a list of rows.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreRequest'
required: true
/v1/scoring/jobs: /v1/scoring/jobs:
post: post:
responses: responses:
@ -2526,6 +2496,36 @@ paths:
schema: schema:
$ref: '#/components/schemas/ScoreDatasetRequest' $ref: '#/components/schemas/ScoreDatasetRequest'
required: true required: true
/v1/scoring/score-rows:
post:
responses:
'200':
description: >-
ScoreResponse object containing rows and aggregated results
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Scoring
description: Score a list of rows.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ScoreRowsRequest'
required: true
/v1/post-training/supervised-fine-tune: /v1/post-training/supervised-fine-tune:
post: post:
responses: responses:
@ -6019,7 +6019,7 @@ components:
description: >- description: >-
A description of the scoring function type. - E.g. Write your custom judge A description of the scoring function type. - E.g. Write your custom judge
prompt to score the answer. prompt to score the answer.
supported_purposes: supported_dataset_purposes:
type: array type: array
items: items:
type: string type: string
@ -6039,7 +6039,7 @@ components:
required: required:
- type - type
- description - description
- supported_purposes - supported_dataset_purposes
title: ScoringFnTypeInfo title: ScoringFnTypeInfo
ListScoringFunctionTypesResponse: ListScoringFunctionTypesResponse:
type: object type: object
@ -6982,47 +6982,6 @@ components:
- attributes_to_save - attributes_to_save
- dataset_id - dataset_id
title: SaveSpansToDatasetRequest title: SaveSpansToDatasetRequest
ScoreRequest:
type: object
properties:
dataset_rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows to score.
scoring_fn_ids:
type: array
items:
type: string
description: >-
The scoring function ids to use for the scoring.
additionalProperties: false
required:
- dataset_rows
- scoring_fn_ids
title: ScoreRequest
ScoreResponse:
type: object
properties:
results:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: >-
A map of scoring function name to ScoringResult.
additionalProperties: false
required:
- results
title: ScoreResponse
description: The response from scoring.
ScoreDatasetRequest: ScoreDatasetRequest:
type: object type: object
properties: properties:
@ -7050,6 +7009,47 @@ components:
required: required:
- results - results
title: ScoreBatchResponse title: ScoreBatchResponse
ScoreRowsRequest:
type: object
properties:
dataset_rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows to score.
scoring_fn_ids:
type: array
items:
type: string
description: >-
The scoring function ids to use for the scoring.
additionalProperties: false
required:
- dataset_rows
- scoring_fn_ids
title: ScoreRowsRequest
ScoreResponse:
type: object
properties:
results:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: >-
A map of scoring function name to ScoringResult.
additionalProperties: false
required:
- results
title: ScoreResponse
description: The response from scoring.
AlgorithmConfig: AlgorithmConfig:
oneOf: oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig' - $ref: '#/components/schemas/LoraFinetuningConfig'

View file

@ -12,16 +12,17 @@ from typing import (
Literal, Literal,
Optional, Optional,
Protocol, Protocol,
Union,
runtime_checkable, runtime_checkable,
Union,
) )
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing_extensions import Annotated from typing_extensions import Annotated
from llama_stack.apis.datasets import DatasetPurpose
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from llama_stack.apis.datasets import DatasetPurpose
# Perhaps more structure can be imposed on these functions. Maybe they could be associated # Perhaps more structure can be imposed on these functions. Maybe they could be associated
# with standard metrics so they can be rolled up? # with standard metrics so they can be rolled up?
@ -93,6 +94,7 @@ class RegexParserScoringFnParams(BaseModel):
default_factory=list, default_factory=list,
) )
class CustomLLMAsJudgeScoringFnParams(BaseModel): class CustomLLMAsJudgeScoringFnParams(BaseModel):
type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge" type: Literal["custom_llm_as_judge"] = "custom_llm_as_judge"
judge_model: str judge_model: str
@ -102,6 +104,7 @@ class CustomLLMAsJudgeScoringFnParams(BaseModel):
default_factory=list, default_factory=list,
) )
@json_schema_type @json_schema_type
class RegexParserScoringFn(BaseModel): class RegexParserScoringFn(BaseModel):
type: Literal["regex_parser"] = "regex_parser" type: Literal["regex_parser"] = "regex_parser"
@ -113,36 +116,43 @@ class RegexParserMathScoringFn(BaseModel):
type: Literal["regex_parser_math_response"] = "regex_parser_math_response" type: Literal["regex_parser_math_response"] = "regex_parser_math_response"
regex_parser_math_response: RegexParserScoringFnParams regex_parser_math_response: RegexParserScoringFnParams
@json_schema_type @json_schema_type
class EqualityScoringFn(BaseModel): class EqualityScoringFn(BaseModel):
type: Literal["equality"] = "equality" type: Literal["equality"] = "equality"
equality: BasicScoringFnParams equality: BasicScoringFnParams
@json_schema_type @json_schema_type
class SubsetOfScoringFn(BaseModel): class SubsetOfScoringFn(BaseModel):
type: Literal["subset_of"] = "subset_of" type: Literal["subset_of"] = "subset_of"
subset_of: BasicScoringFnParams subset_of: BasicScoringFnParams
@json_schema_type @json_schema_type
class FactualityScoringFn(BaseModel): class FactualityScoringFn(BaseModel):
type: Literal["factuality"] = "factuality" type: Literal["factuality"] = "factuality"
factuality: BasicScoringFnParams factuality: BasicScoringFnParams
@json_schema_type @json_schema_type
class FaithfulnessScoringFn(BaseModel): class FaithfulnessScoringFn(BaseModel):
type: Literal["faithfulness"] = "faithfulness" type: Literal["faithfulness"] = "faithfulness"
faithfulness: BasicScoringFnParams faithfulness: BasicScoringFnParams
@json_schema_type @json_schema_type
class AnswerCorrectnessScoringFn(BaseModel): class AnswerCorrectnessScoringFn(BaseModel):
type: Literal["answer_correctness"] = "answer_correctness" type: Literal["answer_correctness"] = "answer_correctness"
answer_correctness: BasicScoringFnParams answer_correctness: BasicScoringFnParams
@json_schema_type @json_schema_type
class AnswerRelevancyScoringFn(BaseModel): class AnswerRelevancyScoringFn(BaseModel):
type: Literal["answer_relevancy"] = "answer_relevancy" type: Literal["answer_relevancy"] = "answer_relevancy"
answer_relevancy: BasicScoringFnParams answer_relevancy: BasicScoringFnParams
@json_schema_type @json_schema_type
class AnswerSimilarityScoringFn(BaseModel): class AnswerSimilarityScoringFn(BaseModel):
type: Literal["answer_similarity"] = "answer_similarity" type: Literal["answer_similarity"] = "answer_similarity"
@ -205,9 +215,10 @@ ScoringFnDefinition = register_schema(
class CommonScoringFnFields(BaseModel): class CommonScoringFnFields(BaseModel):
""" """
:param fn: The scoring function type and parameters. :param fn: The scoring function type and parameters.
:param metadata: (Optional) Any additional metadata for this definition (e.g. description). :param metadata: (Optional) Any additional metadata for this definition (e.g. description).
""" """
fn: ScoringFnDefinition fn: ScoringFnDefinition
metadata: Dict[str, Any] = Field( metadata: Dict[str, Any] = Field(
default_factory=dict, default_factory=dict,
@ -217,7 +228,9 @@ class CommonScoringFnFields(BaseModel):
@json_schema_type @json_schema_type
class ScoringFn(CommonScoringFnFields, Resource): class ScoringFn(CommonScoringFnFields, Resource):
type: Literal[ResourceType.scoring_function.value] = ResourceType.scoring_function.value type: Literal[ResourceType.scoring_function.value] = (
ResourceType.scoring_function.value
)
@property @property
def scoring_fn_id(self) -> str: def scoring_fn_id(self) -> str:
@ -231,14 +244,15 @@ class ScoringFn(CommonScoringFnFields, Resource):
@json_schema_type @json_schema_type
class ScoringFnTypeInfo(BaseModel): class ScoringFnTypeInfo(BaseModel):
""" """
:param type: The type of scoring function. :param type: The type of scoring function.
:param description: A description of the scoring function type. :param description: A description of the scoring function type.
- E.g. Write your custom judge prompt to score the answer. - E.g. Write your custom judge prompt to score the answer.
:param supported_purposes: The purposes that this scoring function can be used for. :param supported_dataset_purposes: The purposes that this scoring function can be used for.
""" """
type: ScoringFunctionType type: ScoringFunctionType
description: str description: str
supported_purposes: List[DatasetPurpose] = Field( supported_dataset_purposes: List[DatasetPurpose] = Field(
description="The supported purposes (supported dataset schema) that this scoring function can be used for. E.g. eval/question-answer", description="The supported purposes (supported dataset schema) that this scoring function can be used for. E.g. eval/question-answer",
default_factory=list, default_factory=list,
) )
@ -261,16 +275,16 @@ class ListScoringFunctionTypesResponse(BaseModel):
@runtime_checkable @runtime_checkable
class ScoringFunctions(Protocol): class ScoringFunctions(Protocol):
@webmethod(route="/scoring-functions", method="GET") @webmethod(route="/scoring-functions", method="GET")
async def list_scoring_functions(self) -> ListScoringFunctionsResponse: async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
""" """
List all registered scoring functions. List all registered scoring functions.
""" """
... ...
@webmethod(route="/scoring-functions/types", method="GET") @webmethod(route="/scoring-functions/types", method="GET")
async def list_scoring_function_types(self) -> ListScoringFunctionTypesResponse: async def list_scoring_function_types(self) -> ListScoringFunctionTypesResponse:
""" """
List all available scoring function types information and how to use them. List all available scoring function types information and how to use them.
""" """
... ...
@ -278,7 +292,7 @@ class ScoringFunctions(Protocol):
async def get_scoring_function( async def get_scoring_function(
self, self,
scoring_fn_id: str, scoring_fn_id: str,
) -> Optional[ScoringFn]: ) -> Optional[ScoringFn]:
""" """
Get a scoring function by its ID. Get a scoring function by its ID.
:param scoring_fn_id: The ID of the scoring function to get. :param scoring_fn_id: The ID of the scoring function to get.
@ -302,12 +316,12 @@ class ScoringFunctions(Protocol):
- E.g. {"description": "This scoring function is used for ..."} - E.g. {"description": "This scoring function is used for ..."}
""" """
... ...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE") @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
async def unregister_scoring_function( async def unregister_scoring_function(
self, self,
scoring_fn_id: str, scoring_fn_id: str,
) -> None: ) -> None:
""" """
Unregister a scoring function by its ID. Unregister a scoring function by its ID.
:param scoring_fn_id: The ID of the scoring function to unregister. :param scoring_fn_id: The ID of the scoring function to unregister.