forked from phoenix-oss/llama-stack-mirror
update EvaluationTask
This commit is contained in:
parent
f107e3229b
commit
d994499f09
3 changed files with 32 additions and 5 deletions
15
docs/_static/llama-stack-spec.html
vendored
15
docs/_static/llama-stack-spec.html
vendored
|
@ -8527,23 +8527,28 @@
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"benchmark_id": {
|
"benchmark_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The benchmark ID to evaluate."
|
||||||
},
|
},
|
||||||
"dataset_id": {
|
"dataset_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The dataset ID to evaluate."
|
||||||
},
|
},
|
||||||
"data_source": {
|
"data_source": {
|
||||||
"$ref": "#/components/schemas/DataSource"
|
"$ref": "#/components/schemas/DataSource",
|
||||||
|
"description": "The data source to evaluate."
|
||||||
},
|
},
|
||||||
"grader_ids": {
|
"grader_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "The grader IDs to evaluate."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"title": "EvaluationTask"
|
"title": "EvaluationTask",
|
||||||
|
"description": "A task for evaluation. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
|
||||||
},
|
},
|
||||||
"GradeRequest": {
|
"GradeRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
10
docs/_static/llama-stack-spec.yaml
vendored
10
docs/_static/llama-stack-spec.yaml
vendored
|
@ -5908,16 +5908,26 @@ components:
|
||||||
properties:
|
properties:
|
||||||
benchmark_id:
|
benchmark_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The benchmark ID to evaluate.
|
||||||
dataset_id:
|
dataset_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The dataset ID to evaluate.
|
||||||
data_source:
|
data_source:
|
||||||
$ref: '#/components/schemas/DataSource'
|
$ref: '#/components/schemas/DataSource'
|
||||||
|
description: The data source to evaluate.
|
||||||
grader_ids:
|
grader_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: The grader IDs to evaluate.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
title: EvaluationTask
|
title: EvaluationTask
|
||||||
|
description: >-
|
||||||
|
A task for evaluation. To specify a task, one of the following must be provided:
|
||||||
|
- `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id`
|
||||||
|
and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
|
||||||
|
- `data_source` and `grader_ids`: Run evaluation task against a data source
|
||||||
|
(e.g. rows, uri, etc.) and a list of grader_ids
|
||||||
GradeRequest:
|
GradeRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -49,6 +49,18 @@ EvaluationCandidate = register_schema(
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class EvaluationTask(BaseModel):
|
class EvaluationTask(BaseModel):
|
||||||
|
"""
|
||||||
|
A task for evaluation. To specify a task, one of the following must be provided:
|
||||||
|
- `benchmark_id`: Run evaluation task against a benchmark_id
|
||||||
|
- `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids
|
||||||
|
- `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids
|
||||||
|
|
||||||
|
:param benchmark_id: The benchmark ID to evaluate.
|
||||||
|
:param dataset_id: The dataset ID to evaluate.
|
||||||
|
:param data_source: The data source to evaluate.
|
||||||
|
:param grader_ids: The grader IDs to evaluate.
|
||||||
|
"""
|
||||||
|
|
||||||
benchmark_id: Optional[str] = None
|
benchmark_id: Optional[str] = None
|
||||||
dataset_id: Optional[str] = None
|
dataset_id: Optional[str] = None
|
||||||
data_source: Optional[DataSource] = None
|
data_source: Optional[DataSource] = None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue