feat: convert Benchmarks API to use FastAPI router (#4309)

# What does this PR do? Convert the Benchmarks API from @webmethod decorators to FastAPI router pattern, matching the Batches API structure. One notable change is the update of stack.py to handle request models in register_resources(). Closes: #4308 ## Test Plan CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] | select(.route | contains("benchmark"))'` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-16 20:42:38 +00:00 · 2025-12-10 15:04:27 +01:00 · 2025-12-10 15:04:27 +01:00 · ff375f1abb
commit ff375f1abb
parent 661985e240
18 changed files with 862 additions and 195 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -3404,7 +3404,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
    post:
@ -3422,10 +3422,10 @@ paths:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
        '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
      tags:
      - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
      description: Register a benchmark.
      operationId: register_benchmark_v1alpha_eval_benchmarks_post
      requestBody:
@ -3445,20 +3445,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -3467,26 +3467,28 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
          title: Benchmark Id
        description: The ID of the benchmark to get.
    delete:
      responses:
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
          description: Default Response
        '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
      tags:
      - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
      description: Unregister a benchmark.
      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
      parameters:
@ -3495,7 +3497,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
          title: Benchmark Id
        description: The ID of the benchmark to unregister.
      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
@ -10391,20 +10395,23 @@ components:
          type: string
          const: benchmark
          title: Type
          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -10420,10 +10427,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -11385,33 +11394,40 @@ components:
        benchmark_id:
          type: string
          title: Benchmark Id
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -12617,6 +12633,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
    ListBenchmarksRequest:
      description: Request model for listing benchmarks.
      properties: {}
      title: ListBenchmarksRequest
      type: object
    GetBenchmarkRequest:
      description: Request model for getting a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to get.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: GetBenchmarkRequest
      type: object
    UnregisterBenchmarkRequest:
      description: Request model for unregistering a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to unregister.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: UnregisterBenchmarkRequest
      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -794,7 +794,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
    post:
@ -812,10 +812,10 @@ paths:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
        '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
      tags:
      - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
      description: Register a benchmark.
      operationId: register_benchmark_v1alpha_eval_benchmarks_post
      requestBody:
@ -835,20 +835,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -857,26 +857,28 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
          title: Benchmark Id
        description: The ID of the benchmark to get.
    delete:
      responses:
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
          description: Default Response
        '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
      tags:
      - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
      description: Unregister a benchmark.
      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
      parameters:
@ -885,7 +887,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
          title: Benchmark Id
        description: The ID of the benchmark to unregister.
      deprecated: true
 components:
  schemas:
@ -7375,20 +7379,23 @@ components:
          type: string
          const: benchmark
          title: Type
          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -7404,10 +7411,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -8369,33 +8378,40 @@ components:
        benchmark_id:
          type: string
          title: Benchmark Id
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -9601,6 +9617,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
    ListBenchmarksRequest:
      description: Request model for listing benchmarks.
      properties: {}
      title: ListBenchmarksRequest
      type: object
    GetBenchmarkRequest:
      description: Request model for getting a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to get.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: GetBenchmarkRequest
      type: object
    UnregisterBenchmarkRequest:
      description: Request model for unregistering a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to unregister.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: UnregisterBenchmarkRequest
      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -188,7 +188,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
  /v1alpha/eval/benchmarks/{benchmark_id}:
@ -201,20 +201,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -223,7 +223,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
          title: Benchmark Id
        description: The ID of the benchmark to get.
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -6517,20 +6519,23 @@ components:
          type: string
          const: benchmark
          title: Type
          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -6546,10 +6551,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -7346,6 +7353,45 @@ components:
      - $ref: '#/components/schemas/RowsDataSource'
        title: RowsDataSource
      title: URIDataSource | RowsDataSource
    RegisterBenchmarkRequest:
      properties:
        benchmark_id:
          type: string
          title: Benchmark Id
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -8395,6 +8441,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
    ListBenchmarksRequest:
      description: Request model for listing benchmarks.
      properties: {}
      title: ListBenchmarksRequest
      type: object
    GetBenchmarkRequest:
      description: Request model for getting a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to get.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: GetBenchmarkRequest
      type: object
    UnregisterBenchmarkRequest:
      description: Request model for unregistering a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to unregister.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: UnregisterBenchmarkRequest
      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -9166,20 +9166,23 @@ components:
          type: string
          const: benchmark
          title: Type
          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -9195,10 +9198,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -9848,6 +9853,45 @@ components:
      - $ref: '#/components/schemas/RowsDataSource'
        title: RowsDataSource
      title: URIDataSource | RowsDataSource
    RegisterBenchmarkRequest:
      properties:
        benchmark_id:
          type: string
          title: Benchmark Id
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -11053,6 +11097,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
    ListBenchmarksRequest:
      description: Request model for listing benchmarks.
      properties: {}
      title: ListBenchmarksRequest
      type: object
    GetBenchmarkRequest:
      description: Request model for getting a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to get.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: GetBenchmarkRequest
      type: object
    UnregisterBenchmarkRequest:
      description: Request model for unregistering a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to unregister.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: UnregisterBenchmarkRequest
      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -3404,7 +3404,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
    post:
@ -3422,10 +3422,10 @@ paths:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
        '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
      tags:
      - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
      description: Register a benchmark.
      operationId: register_benchmark_v1alpha_eval_benchmarks_post
      requestBody:
@ -3445,20 +3445,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -3467,26 +3467,28 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
          title: Benchmark Id
        description: The ID of the benchmark to get.
    delete:
      responses:
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
          description: Default Response
        '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
      tags:
      - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
      description: Unregister a benchmark.
      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
      parameters:
@ -3495,7 +3497,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
          title: Benchmark Id
        description: The ID of the benchmark to unregister.
      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
@ -10391,20 +10395,23 @@ components:
          type: string
          const: benchmark
          title: Type
          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -10420,10 +10427,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -11385,33 +11394,40 @@ components:
        benchmark_id:
          type: string
          title: Benchmark Id
          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -12617,6 +12633,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
    ListBenchmarksRequest:
      description: Request model for listing benchmarks.
      properties: {}
      title: ListBenchmarksRequest
      type: object
    GetBenchmarkRequest:
      description: Request model for getting a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to get.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: GetBenchmarkRequest
      type: object
    UnregisterBenchmarkRequest:
      description: Request model for unregistering a benchmark.
      properties:
        benchmark_id:
          description: The ID of the benchmark to unregister.
          title: Benchmark Id
          type: string
      required:
      - benchmark_id
      title: UnregisterBenchmarkRequest
      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@ -9,6 +9,7 @@ from importlib.metadata import version
 from pydantic import BaseModel
 from llama_stack.core.datatypes import StackConfig
 from llama_stack.core.distribution import builtin_automatically_routed_apis
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.fastapi_router_registry import (
    _ROUTER_FACTORIES,
@ -65,6 +66,17 @@ class DistributionInspectImpl(Inspect):
        def _get_provider_types(api: Api) -> list[str]:
            if api.value in ["providers", "inspect"]:
                return []  # These APIs don't have "real" providers  they're internal to the stack
            # For routing table APIs, look up providers from their router API
            # (e.g., benchmarks -> eval, models -> inference, etc.)
            auto_routed_apis = builtin_automatically_routed_apis()
            for auto_routed in auto_routed_apis:
                if auto_routed.routing_table_api == api:
                    # This is a routing table API, use its router API for providers
                    providers = config.providers.get(auto_routed.router_api.value, [])
                    return [p.provider_type for p in providers] if providers else []
            # Regular API, look up providers directly
            providers = config.providers.get(api.value, [])
            return [p.provider_type for p in providers] if providers else []
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@ -10,6 +10,7 @@ import json
 import logging  # allow-direct-logging
 import os
 import sys
 import typing
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
@ -490,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                unwrapped_body_param = param
                break
        # Check for parameters with Depends() annotation (FastAPI router endpoints)
        # These need special handling: construct the request model from body
        depends_param = None
        for param in params_list:
            param_type = param.annotation
            if get_origin(param_type) is typing.Annotated:
                args = get_args(param_type)
                if len(args) > 1:
                    # Check if any metadata is Depends
                    metadata = args[1:]
                    for item in metadata:
                        # Check if it's a Depends object (has dependency attribute or is a callable)
                        # Depends objects typically have a 'dependency' attribute or are callable functions
                        if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)):
                            depends_param = param
                            break
                if depends_param:
                    break
        # Convert parameters to Pydantic models where needed
        converted_body = {}
        for param_name, param in sig.parameters.items():
@ -500,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                else:
                    converted_body[param_name] = convert_to_pydantic(param.annotation, value)
        # Handle Depends parameter: construct request model from body
        if depends_param and depends_param.name not in converted_body:
            param_type = depends_param.annotation
            if get_origin(param_type) is typing.Annotated:
                base_type = get_args(param_type)[0]
                # Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
                # In Python 3.10+, Union types created with | syntax are still typing.Union
                origin = get_origin(base_type)
                if origin is Union:
                    # Get the first non-None type from the Union
                    union_args = get_args(base_type)
                    base_type = next(
                        (t for t in union_args if t is not type(None) and t is not None),
                        union_args[0] if union_args else None,
                    )
                # Only try to instantiate if it's a class (not a Union or other non-callable type)
                if base_type is not None and inspect.isclass(base_type) and callable(base_type):
                    # Construct the request model from all body parameters
                    converted_body[depends_param.name] = base_type(**body)
        # handle unwrapped body parameter after processing all named parameters
        if unwrapped_body_param:
            base_type = get_args(unwrapped_body_param.annotation)[0]
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@ -4,13 +4,20 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any
 from llama_stack.core.datatypes import (
    BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
-from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api import (
    Benchmark,
    Benchmarks,
    GetBenchmarkRequest,
    ListBenchmarksRequest,
    ListBenchmarksResponse,
    RegisterBenchmarkRequest,
    UnregisterBenchmarkRequest,
 )
 from .common import CommonRoutingTableImpl
@ -18,26 +25,21 @@ logger = get_logger(name=__name__, category="core::routing_tables")
 class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
-    async def list_benchmarks(self) -> ListBenchmarksResponse:
+    async def list_benchmarks(self, request: ListBenchmarksRequest) -> ListBenchmarksResponse:
        return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
-    async def get_benchmark(self, benchmark_id: str) -> Benchmark:
+    async def get_benchmark(self, request: GetBenchmarkRequest) -> Benchmark:
-        benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
+        benchmark = await self.get_object_by_identifier("benchmark", request.benchmark_id)
        if benchmark is None:
-            raise ValueError(f"Benchmark '{benchmark_id}' not found")
+            raise ValueError(f"Benchmark '{request.benchmark_id}' not found")
        return benchmark
    async def register_benchmark(
        self,
-        benchmark_id: str,
+        request: RegisterBenchmarkRequest,
        dataset_id: str,
        scoring_functions: list[str],
        metadata: dict[str, Any] | None = None,
        provider_benchmark_id: str | None = None,
        provider_id: str | None = None,
    ) -> None:
-        if metadata is None:
+        metadata = request.metadata if request.metadata is not None else {}
-            metadata = {}
+        provider_id = request.provider_id
        if provider_id is None:
            if len(self.impls_by_provider_id) == 1:
                provider_id = list(self.impls_by_provider_id.keys())[0]
@ -45,18 +47,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
                raise ValueError(
                    "No provider specified and multiple providers available. Please specify a provider_id."
                )
        provider_benchmark_id = request.provider_benchmark_id
        if provider_benchmark_id is None:
-            provider_benchmark_id = benchmark_id
+            provider_benchmark_id = request.benchmark_id
        benchmark = BenchmarkWithOwner(
-            identifier=benchmark_id,
+            identifier=request.benchmark_id,
-            dataset_id=dataset_id,
+            dataset_id=request.dataset_id,
-            scoring_functions=scoring_functions,
+            scoring_functions=request.scoring_functions,
            metadata=metadata,
            provider_id=provider_id,
            provider_resource_id=provider_benchmark_id,
        )
        await self.register_object(benchmark)
-    async def unregister_benchmark(self, benchmark_id: str) -> None:
+    async def unregister_benchmark(self, request: UnregisterBenchmarkRequest) -> None:
-        existing_benchmark = await self.get_benchmark(benchmark_id)
+        get_request = GetBenchmarkRequest(benchmark_id=request.benchmark_id)
        existing_benchmark = await self.get_benchmark(get_request)
        await self.unregister_object(existing_benchmark)
--- a/src/llama_stack/core/server/fastapi_router_registry.py
+++ b/src/llama_stack/core/server/fastapi_router_registry.py
@ -17,7 +17,7 @@ from fastapi import APIRouter
 from fastapi.routing import APIRoute
 from starlette.routing import Route
-from llama_stack_api import batches
+from llama_stack_api import batches, benchmarks
 # Router factories for APIs that have FastAPI routers
 # Add new APIs here as they are migrated to the router system
@ -25,6 +25,7 @@ from llama_stack_api.datatypes import Api
 _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
    "batches": batches.fastapi_routes.create_router,
    "benchmarks": benchmarks.fastapi_routes.create_router,
 }
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@ -13,6 +13,11 @@ from aiohttp import hdrs
 from starlette.routing import Route
 from llama_stack.core.resolver import api_protocol_map
 from llama_stack.core.server.fastapi_router_registry import (
    _ROUTER_FACTORIES,
    build_fastapi_router,
    get_router_routes,
 )
 from llama_stack_api import Api, ExternalApiSpec, WebMethod
 EndpointFunc = Callable[..., Any]
@ -85,7 +90,53 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No
        return f"^{pattern}$"
    # Process routes from FastAPI routers
    for api_name in _ROUTER_FACTORIES.keys():
        api = Api(api_name)
        if api not in impls:
            continue
        impl = impls[api]
        router = build_fastapi_router(api, impl)
        if router:
            router_routes = get_router_routes(router)
            for route in router_routes:
                # Get the endpoint function from the route
                # For FastAPI routes, the endpoint is the actual function
                func = route.endpoint
                if func is None:
                    continue
                # Get the first (and typically only) method from the set, filtering out HEAD
                available_methods = [m for m in (route.methods or []) if m != "HEAD"]
                if not available_methods:
                    continue  # Skip if only HEAD method is available
                method = available_methods[0].lower()
                if method not in route_impls:
                    route_impls[method] = {}
                # Create a minimal WebMethod for router routes (needed for RouteMatch tuple)
                # We don't have webmethod metadata for router routes, so create a minimal one
                # that has the attributes used by the library client (descriptive_name for tracing)
                #
                # TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers):
                #   - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects)
                #   - Pass summary directly in RouteMatch instead of WebMethod
                #   - Remove this WebMethod() instantiation entirely
                #   - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name
                webmethod = WebMethod(descriptive_name=None)
                route_impls[method][_convert_path_to_regex(route.path)] = (
                    func,
                    route.path,
                    webmethod,
                )
    # Process routes from legacy webmethod-based APIs
    for api, api_routes in api_to_routes.items():
        # Skip APIs that have routers (already processed above)
        if api.value in _ROUTER_FACTORIES:
            continue
        if api not in impls:
            continue
        for route, webmethod in api_routes:
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@ -6,12 +6,14 @@
 import asyncio
 import importlib.resources
 import inspect
 import os
 import re
 import tempfile
-from typing import Any
+from typing import Any, get_type_hints
 import yaml
 from pydantic import BaseModel
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackConfig, VectorStoresConfig
@ -108,6 +110,81 @@ REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None
 def is_request_model(t: Any) -> bool:
    """Check if a type is a request model (Pydantic BaseModel).
    Args:
        t: The type to check
    Returns:
        True if the type is a Pydantic BaseModel subclass, False otherwise
    """
    return inspect.isclass(t) and issubclass(t, BaseModel)
 async def invoke_with_optional_request(method: Any) -> Any:
    """Invoke a method, automatically creating a request instance if needed.
    For APIs that use request models, this will create an empty request object.
    For backward compatibility, falls back to calling without arguments.
    Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
    Handles methods with:
    - No parameters: calls without arguments
    - One or more request model parameters: creates empty instances for each
    - Mixed parameters: creates request models, uses defaults for others
    - Required non-request-model parameters without defaults: falls back to calling without arguments
    Args:
        method: The method to invoke
    Returns:
        The result of calling the method
    """
    try:
        hints = get_type_hints(method)
    except Exception:
        # Forward references can't be resolved, fall back to calling without request
        return await method()
    params = list(inspect.signature(method).parameters.values())
    params = [p for p in params if p.name != "self"]
    if not params:
        return await method()
    # Build arguments for the method call
    args: dict[str, Any] = {}
    can_call = True
    for param in params:
        param_type = hints.get(param.name)
        # If it's a request model, try to create an empty instance
        if param_type and is_request_model(param_type):
            try:
                args[param.name] = param_type()
            except Exception:
                # Request model requires arguments, can't create empty instance
                can_call = False
                break
        # If it has a default value, we can skip it (will use default)
        elif param.default != inspect.Parameter.empty:
            continue
        # Required parameter that's not a request model - can't provide it
        else:
            can_call = False
            break
    if can_call and args:
        return await method(**args)
    # Fall back to calling without arguments for backward compatibility
    return await method()
 async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
    for rsrc, api, register_method, list_method in RESOURCES:
        objects = getattr(run_config.registered_resources, rsrc)
@ -129,7 +206,7 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
            await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
        method = getattr(impls[api], list_method)
-        response = await method()
+        response = await invoke_with_optional_request(method)
        objects_to_process = response.data if hasattr(response, "data") else response
--- a/src/llama_stack_api/init.py
+++ b/src/llama_stack_api/init.py
@ -40,7 +40,11 @@ from .benchmarks import (
    BenchmarkInput,
    Benchmarks,
    CommonBenchmarkFields,
    GetBenchmarkRequest,
    ListBenchmarksRequest,
    ListBenchmarksResponse,
    RegisterBenchmarkRequest,
    UnregisterBenchmarkRequest,
 )
 # Import commonly used types from common submodule
@ -567,7 +571,11 @@ __all__ = [
    "LLMRAGQueryGeneratorConfig",
    "ListBatchesResponse",
    "RetrieveBatchRequest",
    "GetBenchmarkRequest",
    "ListBenchmarksRequest",
    "ListBenchmarksResponse",
    "RegisterBenchmarkRequest",
    "UnregisterBenchmarkRequest",
    "ListDatasetsResponse",
    "ListModelsResponse",
    "ListOpenAIChatCompletionResponse",
--- a/src/llama_stack_api/benchmarks.py
+++ b/src/llama_stack_api/benchmarks.py
@ -1,105 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field
 from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type, webmethod
 from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 class CommonBenchmarkFields(BaseModel):
    dataset_id: str
    scoring_functions: list[str]
    metadata: dict[str, Any] = Field(
        default_factory=dict,
        description="Metadata for this evaluation task",
    )
@json_schema_type
 class Benchmark(CommonBenchmarkFields, Resource):
    """A benchmark resource for evaluating model performance.
    :param dataset_id: Identifier of the dataset to use for the benchmark evaluation
    :param scoring_functions: List of scoring function identifiers to apply during evaluation
    :param metadata: Metadata for this evaluation task
    :param type: The resource type, always benchmark
    """
    type: Literal[ResourceType.benchmark] = ResourceType.benchmark
    @property
    def benchmark_id(self) -> str:
        return self.identifier
    @property
    def provider_benchmark_id(self) -> str | None:
        return self.provider_resource_id
 class BenchmarkInput(CommonBenchmarkFields, BaseModel):
    benchmark_id: str
    provider_id: str | None = None
    provider_benchmark_id: str | None = None
@json_schema_type
 class ListBenchmarksResponse(BaseModel):
    data: list[Benchmark]
@runtime_checkable
 class Benchmarks(Protocol):
    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def list_benchmarks(self) -> ListBenchmarksResponse:
        """List all benchmarks.
        :returns: A ListBenchmarksResponse.
        """
        ...
    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def get_benchmark(
        self,
        benchmark_id: str,
    ) -> Benchmark:
        """Get a benchmark by its ID.
        :param benchmark_id: The ID of the benchmark to get.
        :returns: A Benchmark.
        """
        ...
    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
    async def register_benchmark(
        self,
        benchmark_id: str,
        dataset_id: str,
        scoring_functions: list[str],
        provider_benchmark_id: str | None = None,
        provider_id: str | None = None,
        metadata: dict[str, Any] | None = None,
    ) -> None:
        """Register a benchmark.
        :param benchmark_id: The ID of the benchmark to register.
        :param dataset_id: The ID of the dataset to use for the benchmark.
        :param scoring_functions: The scoring functions to use for the benchmark.
        :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
        :param provider_id: The ID of the provider to use for the benchmark.
        :param metadata: The metadata to use for the benchmark.
        """
        ...
    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
    async def unregister_benchmark(self, benchmark_id: str) -> None:
        """Unregister a benchmark.
        :param benchmark_id: The ID of the benchmark to unregister.
        """
        ...
--- a/src/llama_stack_api/benchmarks/init.py
+++ b/src/llama_stack_api/benchmarks/init.py
@ -0,0 +1,43 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """Benchmarks API protocol and models.
 This module contains the Benchmarks protocol definition.
 Pydantic models are defined in llama_stack_api.benchmarks.models.
 The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
 """
 # Import fastapi_routes for router factory access
 from . import fastapi_routes
 # Import protocol for re-export
 from .api import Benchmarks
 # Import models for re-export
 from .models import (
    Benchmark,
    BenchmarkInput,
    CommonBenchmarkFields,
    GetBenchmarkRequest,
    ListBenchmarksRequest,
    ListBenchmarksResponse,
    RegisterBenchmarkRequest,
    UnregisterBenchmarkRequest,
 )
 __all__ = [
    "Benchmarks",
    "Benchmark",
    "BenchmarkInput",
    "CommonBenchmarkFields",
    "ListBenchmarksResponse",
    "ListBenchmarksRequest",
    "GetBenchmarkRequest",
    "RegisterBenchmarkRequest",
    "UnregisterBenchmarkRequest",
    "fastapi_routes",
 ]
--- a/src/llama_stack_api/benchmarks/api.py
+++ b/src/llama_stack_api/benchmarks/api.py
@ -0,0 +1,39 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Protocol, runtime_checkable
 from .models import (
    Benchmark,
    GetBenchmarkRequest,
    ListBenchmarksRequest,
    ListBenchmarksResponse,
    RegisterBenchmarkRequest,
    UnregisterBenchmarkRequest,
 )
@runtime_checkable
 class Benchmarks(Protocol):
    async def list_benchmarks(
        self,
        request: ListBenchmarksRequest,
    ) -> ListBenchmarksResponse: ...
    async def get_benchmark(
        self,
        request: GetBenchmarkRequest,
    ) -> Benchmark: ...
    async def register_benchmark(
        self,
        request: RegisterBenchmarkRequest,
    ) -> None: ...
    async def unregister_benchmark(
        self,
        request: UnregisterBenchmarkRequest,
    ) -> None: ...
--- a/src/llama_stack_api/benchmarks/fastapi_routes.py
+++ b/src/llama_stack_api/benchmarks/fastapi_routes.py
@ -0,0 +1,109 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """FastAPI router for the Benchmarks API.
 This module defines the FastAPI router for the Benchmarks API using standard
 FastAPI route decorators. The router is defined in the API package to keep
 all API-related code together.
 """
 from typing import Annotated
 from fastapi import APIRouter, Body, Depends
 from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
 from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
 from .api import Benchmarks
 from .models import (
    Benchmark,
    GetBenchmarkRequest,
    ListBenchmarksRequest,
    ListBenchmarksResponse,
    RegisterBenchmarkRequest,
    UnregisterBenchmarkRequest,
 )
 # Automatically generate dependency functions from Pydantic models
 # This ensures the models are the single source of truth for descriptions
 get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
 get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
 get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
 def create_router(impl: Benchmarks) -> APIRouter:
    """Create a FastAPI router for the Benchmarks API.
    Args:
        impl: The Benchmarks implementation instance
    Returns:
        APIRouter configured for the Benchmarks API
    """
    router = APIRouter(
        prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
        tags=["Benchmarks"],
        responses=standard_responses,
    )
    @router.get(
        "/eval/benchmarks",
        response_model=ListBenchmarksResponse,
        summary="List all benchmarks.",
        description="List all benchmarks.",
        responses={
            200: {"description": "A ListBenchmarksResponse."},
        },
    )
    async def list_benchmarks(
        request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
    ) -> ListBenchmarksResponse:
        return await impl.list_benchmarks(request)
    @router.get(
        "/eval/benchmarks/{benchmark_id}",
        response_model=Benchmark,
        summary="Get a benchmark by its ID.",
        description="Get a benchmark by its ID.",
        responses={
            200: {"description": "A Benchmark."},
        },
    )
    async def get_benchmark(
        request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
    ) -> Benchmark:
        return await impl.get_benchmark(request)
    @router.post(
        "/eval/benchmarks",
        summary="Register a benchmark.",
        description="Register a benchmark.",
        responses={
            200: {"description": "The benchmark was successfully registered."},
        },
        deprecated=True,
    )
    async def register_benchmark(
        request: Annotated[RegisterBenchmarkRequest, Body(...)],
    ) -> None:
        return await impl.register_benchmark(request)
    @router.delete(
        "/eval/benchmarks/{benchmark_id}",
        summary="Unregister a benchmark.",
        description="Unregister a benchmark.",
        responses={
            200: {"description": "The benchmark was successfully unregistered."},
        },
        deprecated=True,
    )
    async def unregister_benchmark(
        request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
    ) -> None:
        return await impl.unregister_benchmark(request)
    return router
--- a/src/llama_stack_api/benchmarks/models.py
+++ b/src/llama_stack_api/benchmarks/models.py
@ -0,0 +1,109 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """Pydantic models for Benchmarks API requests and responses.
 This module defines the request and response models for the Benchmarks API
 using Pydantic with Field descriptions for OpenAPI schema generation.
 """
 from typing import Any, Literal
 from pydantic import BaseModel, Field
 from llama_stack_api.resource import Resource, ResourceType
 from llama_stack_api.schema_utils import json_schema_type
@json_schema_type
 class ListBenchmarksRequest(BaseModel):
    """Request model for listing benchmarks."""
    pass
@json_schema_type
 class GetBenchmarkRequest(BaseModel):
    """Request model for getting a benchmark."""
    benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
@json_schema_type
 class RegisterBenchmarkRequest(BaseModel):
    """Request model for registering a benchmark."""
    benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
    dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
    scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
    provider_benchmark_id: str | None = Field(
        default=None, description="The ID of the provider benchmark to use for the benchmark."
    )
    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
    metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
@json_schema_type
 class UnregisterBenchmarkRequest(BaseModel):
    """Request model for unregistering a benchmark."""
    benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
 class CommonBenchmarkFields(BaseModel):
    dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
    scoring_functions: list[str] = Field(
        ..., description="List of scoring function identifiers to apply during evaluation."
    )
    metadata: dict[str, Any] = Field(
        default_factory=dict,
        description="Metadata for this evaluation task.",
    )
@json_schema_type
 class Benchmark(CommonBenchmarkFields, Resource):
    """A benchmark resource for evaluating model performance."""
    type: Literal[ResourceType.benchmark] = Field(
        default=ResourceType.benchmark,
        description="The resource type, always benchmark.",
    )
    @property
    def benchmark_id(self) -> str:
        return self.identifier
    @property
    def provider_benchmark_id(self) -> str | None:
        return self.provider_resource_id
 class BenchmarkInput(CommonBenchmarkFields, BaseModel):
    benchmark_id: str = Field(..., description="The ID of the benchmark.")
    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
    provider_benchmark_id: str | None = Field(
        default=None, description="The ID of the provider benchmark to use for the benchmark."
    )
@json_schema_type
 class ListBenchmarksResponse(BaseModel):
    """Response containing a list of benchmark objects."""
    data: list[Benchmark] = Field(..., description="List of benchmark objects.")
 __all__ = [
    "ListBenchmarksRequest",
    "GetBenchmarkRequest",
    "RegisterBenchmarkRequest",
    "UnregisterBenchmarkRequest",
    "CommonBenchmarkFields",
    "Benchmark",
    "BenchmarkInput",
    "ListBenchmarksResponse",
 ]
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@ -22,14 +22,17 @@ from llama_stack_api import (
    Api,
    Dataset,
    DatasetPurpose,
    ListBenchmarksRequest,
    ListToolDefsResponse,
    Model,
    ModelNotFoundError,
    ModelType,
    NumberType,
    RegisterBenchmarkRequest,
    Shield,
    ToolDef,
    ToolGroup,
    UnregisterBenchmarkRequest,
    URIDataSource,
 )
@ -420,24 +423,26 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
    # Register multiple benchmarks and verify listing
    await table.register_benchmark(
-        benchmark_id="test-benchmark",
+        RegisterBenchmarkRequest(
-        dataset_id="test-dataset",
+            benchmark_id="test-benchmark",
-        scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
+            dataset_id="test-dataset",
            scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
        )
    )
-    benchmarks = await table.list_benchmarks()
+    benchmarks = await table.list_benchmarks(ListBenchmarksRequest())
    assert len(benchmarks.data) == 1
    benchmark_ids = {b.identifier for b in benchmarks.data}
    assert "test-benchmark" in benchmark_ids
    # Unregister the benchmark and verify removal
-    await table.unregister_benchmark(benchmark_id="test-benchmark")
+    await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="test-benchmark"))
-    benchmarks_after = await table.list_benchmarks()
+    benchmarks_after = await table.list_benchmarks(ListBenchmarksRequest())
    assert len(benchmarks_after.data) == 0
    # Unregistering a non-existent benchmark should raise a clear error
    with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
-        await table.unregister_benchmark(benchmark_id="dummy_benchmark")
+        await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="dummy_benchmark"))
 async def test_tool_groups_routing_table(cached_disk_dist_registry):