feat: convert Benchmarks API to use FastAPI router (#4309)

# What does this PR do? Convert the Benchmarks API from @webmethod decorators to FastAPI router pattern, matching the Batches API structure. One notable change is the update of stack.py to handle request models in register_resources(). Closes: #4308 ## Test Plan CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] | select(.route | contains("benchmark"))'` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-17 06:02:36 +00:00 · 2025-12-10 15:04:27 +01:00 · 2025-12-10 15:04:27 +01:00 · ff375f1abb
commit ff375f1abb
parent 661985e240
18 changed files with 862 additions and 195 deletions
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -9166,20 +9166,23 @@ components:
          type: string
          const: benchmark
          title: Type
+          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -9195,10 +9198,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
+          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -9848,6 +9853,45 @@ components:
      - $ref: '#/components/schemas/RowsDataSource'
        title: RowsDataSource
      title: URIDataSource | RowsDataSource
+    RegisterBenchmarkRequest:
+      properties:
+        benchmark_id:
+          type: string
+          title: Benchmark Id
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          description: The metadata to use for the benchmark.
+      type: object
+      required:
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
+      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -11053,6 +11097,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties: