forked from phoenix-oss/llama-stack-mirror
update
This commit is contained in:
parent
398319fe7a
commit
5e817cd56a
3 changed files with 187 additions and 187 deletions
214
docs/_static/llama-stack-spec.html
vendored
214
docs/_static/llama-stack-spec.html
vendored
|
@ -2035,6 +2035,49 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/evaluation/grading": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The evaluation job containing grader scores.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluationJob"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Evaluation"
|
||||
],
|
||||
"description": "Schedule a grading job, by grading generated (model or agent) results. The generated results are expected to be in the dataset.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/GradeRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/evaluation/grade_sync": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -2078,49 +2121,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/evaluation/grading": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The evaluation job containing grader scores.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluationJob"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Evaluation"
|
||||
],
|
||||
"description": "Schedule a grading job, by grading generated results. The generated results are expected to be in the dataset.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/GradingRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/health": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -8615,7 +8615,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"GradeSyncRequest": {
|
||||
"GradeRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": {
|
||||
|
@ -8627,69 +8627,7 @@
|
|||
"required": [
|
||||
"task"
|
||||
],
|
||||
"title": "GradeSyncRequest"
|
||||
},
|
||||
"EvaluationResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"generations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The generations in rows for the evaluation."
|
||||
},
|
||||
"scores": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
},
|
||||
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"generations",
|
||||
"scores"
|
||||
],
|
||||
"title": "EvaluationResponse",
|
||||
"description": "A response to an inline evaluation."
|
||||
},
|
||||
"GradingRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": {
|
||||
"$ref": "#/components/schemas/EvaluationTask",
|
||||
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"task"
|
||||
],
|
||||
"title": "GradingRequest"
|
||||
"title": "GradeRequest"
|
||||
},
|
||||
"EvaluationCandidate": {
|
||||
"oneOf": [
|
||||
|
@ -8763,6 +8701,68 @@
|
|||
],
|
||||
"title": "EvaluationJob"
|
||||
},
|
||||
"GradeSyncRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task": {
|
||||
"$ref": "#/components/schemas/EvaluationTask",
|
||||
"description": "The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation task against a benchmark_id - DatasetEvaluationTask: Run evaluation task against a dataset_id and a list of grader_ids - DataEvaluationTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"task"
|
||||
],
|
||||
"title": "GradeSyncRequest"
|
||||
},
|
||||
"EvaluationResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"generations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The generations in rows for the evaluation."
|
||||
},
|
||||
"scores": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
},
|
||||
"description": "The scores for the evaluation. Map of grader id to ScoringResult."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"generations",
|
||||
"scores"
|
||||
],
|
||||
"title": "EvaluationResponse",
|
||||
"description": "A response to an inline evaluation."
|
||||
},
|
||||
"HealthInfo": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
156
docs/_static/llama-stack-spec.yaml
vendored
156
docs/_static/llama-stack-spec.yaml
vendored
|
@ -1385,6 +1385,38 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/evaluation/grading:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
The evaluation job containing grader scores.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluationJob'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Evaluation
|
||||
description: >-
|
||||
Schedule a grading job, by grading generated (model or agent) results. The
|
||||
generated results are expected to be in the dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GradeRequest'
|
||||
required: true
|
||||
/v1/evaluation/grade_sync:
|
||||
post:
|
||||
responses:
|
||||
|
@ -1420,38 +1452,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/GradeSyncRequest'
|
||||
required: true
|
||||
/v1/evaluation/grading:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
The evaluation job containing grader scores.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluationJob'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Evaluation
|
||||
description: >-
|
||||
Schedule a grading job, by grading generated results. The generated results
|
||||
are expected to be in the dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/GradingRequest'
|
||||
required: true
|
||||
/v1/health:
|
||||
get:
|
||||
responses:
|
||||
|
@ -5966,7 +5966,7 @@ components:
|
|||
benchmark: '#/components/schemas/BenchmarkEvaluationTask'
|
||||
dataset: '#/components/schemas/DatasetEvaluationTask'
|
||||
data: '#/components/schemas/DataEvaluationTask'
|
||||
GradeSyncRequest:
|
||||
GradeRequest:
|
||||
type: object
|
||||
properties:
|
||||
task:
|
||||
|
@ -5980,51 +5980,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- task
|
||||
title: GradeSyncRequest
|
||||
EvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
generations:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The generations in rows for the evaluation.
|
||||
scores:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: >-
|
||||
The scores for the evaluation. Map of grader id to ScoringResult.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- generations
|
||||
- scores
|
||||
title: EvaluationResponse
|
||||
description: A response to an inline evaluation.
|
||||
GradingRequest:
|
||||
type: object
|
||||
properties:
|
||||
task:
|
||||
$ref: '#/components/schemas/EvaluationTask'
|
||||
description: >-
|
||||
The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation
|
||||
task against a benchmark_id - DatasetEvaluationTask: Run evaluation task
|
||||
against a dataset_id and a list of grader_ids - DataEvaluationTask: Run
|
||||
evaluation task against a data source (e.g. rows, uri, etc.) and a list
|
||||
of grader_ids
|
||||
additionalProperties: false
|
||||
required:
|
||||
- task
|
||||
title: GradingRequest
|
||||
title: GradeRequest
|
||||
EvaluationCandidate:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelCandidate'
|
||||
|
@ -6078,6 +6034,50 @@ components:
|
|||
- task
|
||||
- candidate
|
||||
title: EvaluationJob
|
||||
GradeSyncRequest:
|
||||
type: object
|
||||
properties:
|
||||
task:
|
||||
$ref: '#/components/schemas/EvaluationTask'
|
||||
description: >-
|
||||
The task to evaluate. One of: - BenchmarkEvaluationTask: Run evaluation
|
||||
task against a benchmark_id - DatasetEvaluationTask: Run evaluation task
|
||||
against a dataset_id and a list of grader_ids - DataEvaluationTask: Run
|
||||
evaluation task against a data source (e.g. rows, uri, etc.) and a list
|
||||
of grader_ids
|
||||
additionalProperties: false
|
||||
required:
|
||||
- task
|
||||
title: GradeSyncRequest
|
||||
EvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
generations:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The generations in rows for the evaluation.
|
||||
scores:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: >-
|
||||
The scores for the evaluation. Map of grader id to ScoringResult.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- generations
|
||||
- scores
|
||||
title: EvaluationResponse
|
||||
description: A response to an inline evaluation.
|
||||
HealthInfo:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -148,9 +148,9 @@ class Evaluation(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(route="/evaluation/grading", method="POST")
|
||||
async def grading(self, task: EvaluationTask) -> EvaluationJob:
|
||||
async def grade(self, task: EvaluationTask) -> EvaluationJob:
|
||||
"""
|
||||
Schedule a grading job, by grading generated results. The generated results are expected to be in the dataset.
|
||||
Schedule a grading job, by grading generated (model or agent) results. The generated results are expected to be in the dataset.
|
||||
|
||||
:param task: The task to evaluate. One of:
|
||||
- BenchmarkEvaluationTask: Run evaluation task against a benchmark_id
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue