mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-02 16:24:32 +00:00
Merge branch 'eval_api_final' into delete_eval_scoring_scoring_fn
This commit is contained in:
commit
e23531c9d0
4 changed files with 50 additions and 38 deletions
26
docs/_static/llama-stack-spec.yaml
vendored
26
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -5328,7 +5328,7 @@ components:
|
|||
EvaluationResponse:
|
||||
type: object
|
||||
properties:
|
||||
generations:
|
||||
result_rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
|
|
@ -5341,17 +5341,22 @@ components:
|
|||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The generations in rows for the evaluation.
|
||||
scores:
|
||||
The result data containing inputs, generations and grades in each row.
|
||||
grades:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: >-
|
||||
The scores for the evaluation. Map of grader id to ScoringResult.
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Map of grader id to aggregated value.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- generations
|
||||
- scores
|
||||
- result_rows
|
||||
- grades
|
||||
title: EvaluationResponse
|
||||
description: A response to an inline evaluation.
|
||||
ScoringResult:
|
||||
|
|
@ -6404,13 +6409,14 @@ components:
|
|||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset to be used to run the benchmark.
|
||||
The ID of the dataset to be used to run the benchmark. ID obtained through
|
||||
`datasets.register()`
|
||||
grader_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
List of grader ids to use for this benchmark.
|
||||
List of grader ids to use for this benchmark. ID obtained through `graders.register()`
|
||||
benchmark_id:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue