feat: convert Benchmarks API to use FastAPI router (#4309)

# What does this PR do?

Convert the Benchmarks API from @webmethod decorators to FastAPI router
pattern, matching the Batches API structure.

One notable change is the update of stack.py to handle request models in
register_resources().

Closes: #4308 

## Test Plan

CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] |
select(.route | contains("benchmark"))'`

---------

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-12-10 15:04:27 +01:00 committed by GitHub
parent 661985e240
commit ff375f1abb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 862 additions and 195 deletions

View file

@ -794,7 +794,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: List Benchmarks
summary: List all benchmarks.
description: List all benchmarks.
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
post:
@ -812,10 +812,10 @@ paths:
description: Default Response
$ref: '#/components/responses/DefaultError'
'204':
description: Successful Response
description: The benchmark was successfully registered.
tags:
- Benchmarks
summary: Register Benchmark
summary: Register a benchmark.
description: Register a benchmark.
operationId: register_benchmark_v1alpha_eval_benchmarks_post
requestBody:
@ -835,20 +835,20 @@ paths:
schema:
$ref: '#/components/schemas/Benchmark'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Benchmarks
summary: Get Benchmark
summary: Get a benchmark by its ID.
description: Get a benchmark by its ID.
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
parameters:
@ -857,26 +857,28 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
description: The ID of the benchmark to get.
title: Benchmark Id
description: The ID of the benchmark to get.
delete:
responses:
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
'204':
description: Successful Response
description: The benchmark was successfully unregistered.
tags:
- Benchmarks
summary: Unregister Benchmark
summary: Unregister a benchmark.
description: Unregister a benchmark.
operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
parameters:
@ -885,7 +887,9 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
description: The ID of the benchmark to unregister.
title: Benchmark Id
description: The ID of the benchmark to unregister.
deprecated: true
components:
schemas:
@ -7375,20 +7379,23 @@ components:
type: string
const: benchmark
title: Type
description: The resource type, always benchmark.
default: benchmark
dataset_id:
type: string
title: Dataset Id
description: Identifier of the dataset to use for the benchmark evaluation.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: List of scoring function identifiers to apply during evaluation.
metadata:
additionalProperties: true
type: object
title: Metadata
description: Metadata for this evaluation task
description: Metadata for this evaluation task.
type: object
required:
- identifier
@ -7404,10 +7411,12 @@ components:
$ref: '#/components/schemas/Benchmark'
type: array
title: Data
description: List of benchmark objects.
type: object
required:
- data
title: ListBenchmarksResponse
description: Response containing a list of benchmark objects.
BenchmarkConfig:
properties:
eval_candidate:
@ -8369,33 +8378,40 @@ components:
benchmark_id:
type: string
title: Benchmark Id
description: The ID of the benchmark to register.
dataset_id:
type: string
title: Dataset Id
description: The ID of the dataset to use for the benchmark.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: The scoring functions to use for the benchmark.
provider_benchmark_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider benchmark to use for the benchmark.
provider_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider to use for the benchmark.
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata to use for the benchmark.
type: object
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
description: Request model for registering a benchmark.
AllowedToolsFilter:
properties:
tool_names:
@ -9601,6 +9617,33 @@ components:
- batch_id
title: CancelBatchRequest
type: object
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
title: ListBenchmarksRequest
type: object
GetBenchmarkRequest:
description: Request model for getting a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to get.
title: Benchmark Id
type: string
required:
- benchmark_id
title: GetBenchmarkRequest
type: object
UnregisterBenchmarkRequest:
description: Request model for unregistering a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to unregister.
title: Benchmark Id
type: string
required:
- benchmark_id
title: UnregisterBenchmarkRequest
type: object
DialogType:
description: Parameter type for dialog data with semantic output labels.
properties:

View file

@ -188,7 +188,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: List Benchmarks
summary: List all benchmarks.
description: List all benchmarks.
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
/v1alpha/eval/benchmarks/{benchmark_id}:
@ -201,20 +201,20 @@ paths:
schema:
$ref: '#/components/schemas/Benchmark'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Benchmarks
summary: Get Benchmark
summary: Get a benchmark by its ID.
description: Get a benchmark by its ID.
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
parameters:
@ -223,7 +223,9 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
description: The ID of the benchmark to get.
title: Benchmark Id
description: The ID of the benchmark to get.
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post:
responses:
@ -6517,20 +6519,23 @@ components:
type: string
const: benchmark
title: Type
description: The resource type, always benchmark.
default: benchmark
dataset_id:
type: string
title: Dataset Id
description: Identifier of the dataset to use for the benchmark evaluation.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: List of scoring function identifiers to apply during evaluation.
metadata:
additionalProperties: true
type: object
title: Metadata
description: Metadata for this evaluation task
description: Metadata for this evaluation task.
type: object
required:
- identifier
@ -6546,10 +6551,12 @@ components:
$ref: '#/components/schemas/Benchmark'
type: array
title: Data
description: List of benchmark objects.
type: object
required:
- data
title: ListBenchmarksResponse
description: Response containing a list of benchmark objects.
BenchmarkConfig:
properties:
eval_candidate:
@ -7346,6 +7353,45 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
RegisterBenchmarkRequest:
properties:
benchmark_id:
type: string
title: Benchmark Id
description: The ID of the benchmark to register.
dataset_id:
type: string
title: Dataset Id
description: The ID of the dataset to use for the benchmark.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: The scoring functions to use for the benchmark.
provider_benchmark_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider benchmark to use for the benchmark.
provider_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider to use for the benchmark.
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata to use for the benchmark.
type: object
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
description: Request model for registering a benchmark.
AllowedToolsFilter:
properties:
tool_names:
@ -8395,6 +8441,33 @@ components:
- batch_id
title: CancelBatchRequest
type: object
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
title: ListBenchmarksRequest
type: object
GetBenchmarkRequest:
description: Request model for getting a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to get.
title: Benchmark Id
type: string
required:
- benchmark_id
title: GetBenchmarkRequest
type: object
UnregisterBenchmarkRequest:
description: Request model for unregistering a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to unregister.
title: Benchmark Id
type: string
required:
- benchmark_id
title: UnregisterBenchmarkRequest
type: object
DialogType:
description: Parameter type for dialog data with semantic output labels.
properties:

View file

@ -9166,20 +9166,23 @@ components:
type: string
const: benchmark
title: Type
description: The resource type, always benchmark.
default: benchmark
dataset_id:
type: string
title: Dataset Id
description: Identifier of the dataset to use for the benchmark evaluation.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: List of scoring function identifiers to apply during evaluation.
metadata:
additionalProperties: true
type: object
title: Metadata
description: Metadata for this evaluation task
description: Metadata for this evaluation task.
type: object
required:
- identifier
@ -9195,10 +9198,12 @@ components:
$ref: '#/components/schemas/Benchmark'
type: array
title: Data
description: List of benchmark objects.
type: object
required:
- data
title: ListBenchmarksResponse
description: Response containing a list of benchmark objects.
BenchmarkConfig:
properties:
eval_candidate:
@ -9848,6 +9853,45 @@ components:
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
RegisterBenchmarkRequest:
properties:
benchmark_id:
type: string
title: Benchmark Id
description: The ID of the benchmark to register.
dataset_id:
type: string
title: Dataset Id
description: The ID of the dataset to use for the benchmark.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: The scoring functions to use for the benchmark.
provider_benchmark_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider benchmark to use for the benchmark.
provider_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider to use for the benchmark.
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata to use for the benchmark.
type: object
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
description: Request model for registering a benchmark.
AllowedToolsFilter:
properties:
tool_names:
@ -11053,6 +11097,33 @@ components:
- batch_id
title: CancelBatchRequest
type: object
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
title: ListBenchmarksRequest
type: object
GetBenchmarkRequest:
description: Request model for getting a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to get.
title: Benchmark Id
type: string
required:
- benchmark_id
title: GetBenchmarkRequest
type: object
UnregisterBenchmarkRequest:
description: Request model for unregistering a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to unregister.
title: Benchmark Id
type: string
required:
- benchmark_id
title: UnregisterBenchmarkRequest
type: object
DialogType:
description: Parameter type for dialog data with semantic output labels.
properties:

View file

@ -3404,7 +3404,7 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: List Benchmarks
summary: List all benchmarks.
description: List all benchmarks.
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
post:
@ -3422,10 +3422,10 @@ paths:
description: Default Response
$ref: '#/components/responses/DefaultError'
'204':
description: Successful Response
description: The benchmark was successfully registered.
tags:
- Benchmarks
summary: Register Benchmark
summary: Register a benchmark.
description: Register a benchmark.
operationId: register_benchmark_v1alpha_eval_benchmarks_post
requestBody:
@ -3445,20 +3445,20 @@ paths:
schema:
$ref: '#/components/schemas/Benchmark'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Benchmarks
summary: Get Benchmark
summary: Get a benchmark by its ID.
description: Get a benchmark by its ID.
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
parameters:
@ -3467,26 +3467,28 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
description: The ID of the benchmark to get.
title: Benchmark Id
description: The ID of the benchmark to get.
delete:
responses:
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
description: Default Response
'204':
description: Successful Response
description: The benchmark was successfully unregistered.
tags:
- Benchmarks
summary: Unregister Benchmark
summary: Unregister a benchmark.
description: Unregister a benchmark.
operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
parameters:
@ -3495,7 +3497,9 @@ paths:
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
description: The ID of the benchmark to unregister.
title: Benchmark Id
description: The ID of the benchmark to unregister.
deprecated: true
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post:
@ -10391,20 +10395,23 @@ components:
type: string
const: benchmark
title: Type
description: The resource type, always benchmark.
default: benchmark
dataset_id:
type: string
title: Dataset Id
description: Identifier of the dataset to use for the benchmark evaluation.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: List of scoring function identifiers to apply during evaluation.
metadata:
additionalProperties: true
type: object
title: Metadata
description: Metadata for this evaluation task
description: Metadata for this evaluation task.
type: object
required:
- identifier
@ -10420,10 +10427,12 @@ components:
$ref: '#/components/schemas/Benchmark'
type: array
title: Data
description: List of benchmark objects.
type: object
required:
- data
title: ListBenchmarksResponse
description: Response containing a list of benchmark objects.
BenchmarkConfig:
properties:
eval_candidate:
@ -11385,33 +11394,40 @@ components:
benchmark_id:
type: string
title: Benchmark Id
description: The ID of the benchmark to register.
dataset_id:
type: string
title: Dataset Id
description: The ID of the dataset to use for the benchmark.
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
description: The scoring functions to use for the benchmark.
provider_benchmark_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider benchmark to use for the benchmark.
provider_id:
anyOf:
- type: string
- type: 'null'
description: The ID of the provider to use for the benchmark.
metadata:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
description: The metadata to use for the benchmark.
type: object
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
description: Request model for registering a benchmark.
AllowedToolsFilter:
properties:
tool_names:
@ -12617,6 +12633,33 @@ components:
- batch_id
title: CancelBatchRequest
type: object
ListBenchmarksRequest:
description: Request model for listing benchmarks.
properties: {}
title: ListBenchmarksRequest
type: object
GetBenchmarkRequest:
description: Request model for getting a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to get.
title: Benchmark Id
type: string
required:
- benchmark_id
title: GetBenchmarkRequest
type: object
UnregisterBenchmarkRequest:
description: Request model for unregistering a benchmark.
properties:
benchmark_id:
description: The ID of the benchmark to unregister.
title: Benchmark Id
type: string
required:
- benchmark_id
title: UnregisterBenchmarkRequest
type: object
DialogType:
description: Parameter type for dialog data with semantic output labels.
properties: