feat: convert Benchmarks API to use FastAPI router (#4309)

# What does this PR do? Convert the Benchmarks API from @webmethod decorators to FastAPI router pattern, matching the Batches API structure. One notable change is the update of stack.py to handle request models in register_resources(). Closes: #4308 ## Test Plan CI and `curl http://localhost:8321/v1/inspect/routes | jq '.data[] | select(.route | contains("benchmark"))'` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-16 20:42:38 +00:00 · 2025-12-10 15:04:27 +01:00 · 2025-12-10 15:04:27 +01:00 · ff375f1abb
commit ff375f1abb
parent 661985e240
18 changed files with 862 additions and 195 deletions
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -3404,7 +3404,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
    post:
@ -3422,10 +3422,10 @@ paths:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
        '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
      tags:
      - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
      description: Register a benchmark.
      operationId: register_benchmark_v1alpha_eval_benchmarks_post
      requestBody:
@ -3445,20 +3445,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
-          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
        '429':
-          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
        '500':
-          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
        default:
-          description: Default Response
          $ref: '#/components/responses/DefaultError'
+          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -3467,26 +3467,28 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
    delete:
      responses:
        '400':
-          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
        '429':
-          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
        '500':
-          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
        default:
-          description: Default Response
          $ref: '#/components/responses/DefaultError'
+          description: Default Response
        '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
      tags:
      - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
      description: Unregister a benchmark.
      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
      parameters:
@ -3495,7 +3497,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+        description: The ID of the benchmark to unregister.
      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
@ -10391,20 +10395,23 @@ components:
          type: string
          const: benchmark
          title: Type
+          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -10420,10 +10427,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
+          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -11385,33 +11394,40 @@ components:
        benchmark_id:
          type: string
          title: Benchmark Id
+          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
+          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -12617,6 +12633,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -794,7 +794,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
    post:
@ -812,10 +812,10 @@ paths:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
        '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
      tags:
      - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
      description: Register a benchmark.
      operationId: register_benchmark_v1alpha_eval_benchmarks_post
      requestBody:
@ -835,20 +835,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
-          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
        '429':
-          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
        '500':
-          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
        default:
-          description: Default Response
          $ref: '#/components/responses/DefaultError'
+          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -857,26 +857,28 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
    delete:
      responses:
        '400':
-          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
        '429':
-          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
        '500':
-          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
        default:
-          description: Default Response
          $ref: '#/components/responses/DefaultError'
+          description: Default Response
        '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
      tags:
      - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
      description: Unregister a benchmark.
      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
      parameters:
@ -885,7 +887,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+        description: The ID of the benchmark to unregister.
      deprecated: true
 components:
  schemas:
@ -7375,20 +7379,23 @@ components:
          type: string
          const: benchmark
          title: Type
+          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -7404,10 +7411,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
+          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -8369,33 +8378,40 @@ components:
        benchmark_id:
          type: string
          title: Benchmark Id
+          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
+          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -9601,6 +9617,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@ -188,7 +188,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
  /v1alpha/eval/benchmarks/{benchmark_id}:
@ -201,20 +201,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
-          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
        '429':
-          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
        '500':
-          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
        default:
-          description: Default Response
          $ref: '#/components/responses/DefaultError'
+          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -223,7 +223,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
@ -6517,20 +6519,23 @@ components:
          type: string
          const: benchmark
          title: Type
+          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -6546,10 +6551,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
+          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -7346,6 +7353,45 @@ components:
      - $ref: '#/components/schemas/RowsDataSource'
        title: RowsDataSource
      title: URIDataSource | RowsDataSource
+    RegisterBenchmarkRequest:
+      properties:
+        benchmark_id:
+          type: string
+          title: Benchmark Id
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          description: The metadata to use for the benchmark.
+      type: object
+      required:
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
+      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -8395,6 +8441,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -9166,20 +9166,23 @@ components:
          type: string
          const: benchmark
          title: Type
+          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -9195,10 +9198,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
+          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -9848,6 +9853,45 @@ components:
      - $ref: '#/components/schemas/RowsDataSource'
        title: RowsDataSource
      title: URIDataSource | RowsDataSource
+    RegisterBenchmarkRequest:
+      properties:
+        benchmark_id:
+          type: string
+          title: Benchmark Id
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          description: The metadata to use for the benchmark.
+      type: object
+      required:
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
+      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -11053,6 +11097,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -3404,7 +3404,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
    post:
@ -3422,10 +3422,10 @@ paths:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
        '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
      tags:
      - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
      description: Register a benchmark.
      operationId: register_benchmark_v1alpha_eval_benchmarks_post
      requestBody:
@ -3445,20 +3445,20 @@ paths:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
-          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
        '429':
-          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
        '500':
-          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
        default:
-          description: Default Response
          $ref: '#/components/responses/DefaultError'
+          description: Default Response
      tags:
      - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
@ -3467,26 +3467,28 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
    delete:
      responses:
        '400':
-          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
        '429':
-          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
        '500':
-          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
        default:
-          description: Default Response
          $ref: '#/components/responses/DefaultError'
+          description: Default Response
        '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
      tags:
      - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
      description: Unregister a benchmark.
      operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
      parameters:
@ -3495,7 +3497,9 @@ paths:
        required: true
        schema:
          type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+        description: The ID of the benchmark to unregister.
      deprecated: true
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
@ -10391,20 +10395,23 @@ components:
          type: string
          const: benchmark
          title: Type
+          description: The resource type, always benchmark.
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
      type: object
      required:
      - identifier
@ -10420,10 +10427,12 @@ components:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
+          description: List of benchmark objects.
      type: object
      required:
      - data
      title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
    BenchmarkConfig:
      properties:
        eval_candidate:
@ -11385,33 +11394,40 @@ components:
        benchmark_id:
          type: string
          title: Benchmark Id
+          description: The ID of the benchmark to register.
        dataset_id:
          type: string
          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
        provider_benchmark_id:
          anyOf:
          - type: string
          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
        provider_id:
          anyOf:
          - type: string
          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
        metadata:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
+          description: The metadata to use for the benchmark.
      type: object
      required:
      - benchmark_id
      - dataset_id
      - scoring_functions
      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
    AllowedToolsFilter:
      properties:
        tool_names:
@ -12617,6 +12633,33 @@ components:
      - batch_id
      title: CancelBatchRequest
      type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
    DialogType:
      description: Parameter type for dialog data with semantic output labels.
      properties:
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@ -9,6 +9,7 @@ from importlib.metadata import version
 from pydantic import BaseModel

 from llama_stack.core.datatypes import StackConfig
+from llama_stack.core.distribution import builtin_automatically_routed_apis
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.fastapi_router_registry import (
    _ROUTER_FACTORIES,
@ -65,6 +66,17 @@ class DistributionInspectImpl(Inspect):
        def _get_provider_types(api: Api) -> list[str]:
            if api.value in ["providers", "inspect"]:
                return []  # These APIs don't have "real" providers  they're internal to the stack
+
+            # For routing table APIs, look up providers from their router API
+            # (e.g., benchmarks -> eval, models -> inference, etc.)
+            auto_routed_apis = builtin_automatically_routed_apis()
+            for auto_routed in auto_routed_apis:
+                if auto_routed.routing_table_api == api:
+                    # This is a routing table API, use its router API for providers
+                    providers = config.providers.get(auto_routed.router_api.value, [])
+                    return [p.provider_type for p in providers] if providers else []
+
+            # Regular API, look up providers directly
            providers = config.providers.get(api.value, [])
            return [p.provider_type for p in providers] if providers else []

--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@ -10,6 +10,7 @@ import json
 import logging  # allow-direct-logging
 import os
 import sys
+import typing
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
@ -490,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                unwrapped_body_param = param
                break

+        # Check for parameters with Depends() annotation (FastAPI router endpoints)
+        # These need special handling: construct the request model from body
+        depends_param = None
+        for param in params_list:
+            param_type = param.annotation
+            if get_origin(param_type) is typing.Annotated:
+                args = get_args(param_type)
+                if len(args) > 1:
+                    # Check if any metadata is Depends
+                    metadata = args[1:]
+                    for item in metadata:
+                        # Check if it's a Depends object (has dependency attribute or is a callable)
+                        # Depends objects typically have a 'dependency' attribute or are callable functions
+                        if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)):
+                            depends_param = param
+                            break
+                if depends_param:
+                    break
+
        # Convert parameters to Pydantic models where needed
        converted_body = {}
        for param_name, param in sig.parameters.items():
@ -500,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                else:
                    converted_body[param_name] = convert_to_pydantic(param.annotation, value)

+        # Handle Depends parameter: construct request model from body
+        if depends_param and depends_param.name not in converted_body:
+            param_type = depends_param.annotation
+            if get_origin(param_type) is typing.Annotated:
+                base_type = get_args(param_type)[0]
+                # Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
+                # In Python 3.10+, Union types created with | syntax are still typing.Union
+                origin = get_origin(base_type)
+                if origin is Union:
+                    # Get the first non-None type from the Union
+                    union_args = get_args(base_type)
+                    base_type = next(
+                        (t for t in union_args if t is not type(None) and t is not None),
+                        union_args[0] if union_args else None,
+                    )
+
+                # Only try to instantiate if it's a class (not a Union or other non-callable type)
+                if base_type is not None and inspect.isclass(base_type) and callable(base_type):
+                    # Construct the request model from all body parameters
+                    converted_body[depends_param.name] = base_type(**body)
+
        # handle unwrapped body parameter after processing all named parameters
        if unwrapped_body_param:
            base_type = get_args(unwrapped_body_param.annotation)[0]
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@ -4,13 +4,20 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from typing import Any

 from llama_stack.core.datatypes import (
    BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
-from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api import (
+    Benchmark,
+    Benchmarks,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)

 from .common import CommonRoutingTableImpl

@ -18,26 +25,21 @@ logger = get_logger(name=__name__, category="core::routing_tables")


 class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
-    async def list_benchmarks(self) -> ListBenchmarksResponse:
+    async def list_benchmarks(self, request: ListBenchmarksRequest) -> ListBenchmarksResponse:
        return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))

-    async def get_benchmark(self, benchmark_id: str) -> Benchmark:
-        benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
+    async def get_benchmark(self, request: GetBenchmarkRequest) -> Benchmark:
+        benchmark = await self.get_object_by_identifier("benchmark", request.benchmark_id)
        if benchmark is None:
-            raise ValueError(f"Benchmark '{benchmark_id}' not found")
+            raise ValueError(f"Benchmark '{request.benchmark_id}' not found")
        return benchmark

    async def register_benchmark(
        self,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: list[str],
-        metadata: dict[str, Any] | None = None,
-        provider_benchmark_id: str | None = None,
-        provider_id: str | None = None,
+        request: RegisterBenchmarkRequest,
    ) -> None:
-        if metadata is None:
-            metadata = {}
+        metadata = request.metadata if request.metadata is not None else {}
+        provider_id = request.provider_id
        if provider_id is None:
            if len(self.impls_by_provider_id) == 1:
                provider_id = list(self.impls_by_provider_id.keys())[0]
@ -45,18 +47,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
                raise ValueError(
                    "No provider specified and multiple providers available. Please specify a provider_id."
                )
+        provider_benchmark_id = request.provider_benchmark_id
        if provider_benchmark_id is None:
-            provider_benchmark_id = benchmark_id
+            provider_benchmark_id = request.benchmark_id
        benchmark = BenchmarkWithOwner(
-            identifier=benchmark_id,
-            dataset_id=dataset_id,
-            scoring_functions=scoring_functions,
+            identifier=request.benchmark_id,
+            dataset_id=request.dataset_id,
+            scoring_functions=request.scoring_functions,
            metadata=metadata,
            provider_id=provider_id,
            provider_resource_id=provider_benchmark_id,
        )
        await self.register_object(benchmark)

-    async def unregister_benchmark(self, benchmark_id: str) -> None:
-        existing_benchmark = await self.get_benchmark(benchmark_id)
+    async def unregister_benchmark(self, request: UnregisterBenchmarkRequest) -> None:
+        get_request = GetBenchmarkRequest(benchmark_id=request.benchmark_id)
+        existing_benchmark = await self.get_benchmark(get_request)
        await self.unregister_object(existing_benchmark)
--- a/src/llama_stack/core/server/fastapi_router_registry.py
+++ b/src/llama_stack/core/server/fastapi_router_registry.py
@ -17,7 +17,7 @@ from fastapi import APIRouter
 from fastapi.routing import APIRoute
 from starlette.routing import Route

-from llama_stack_api import batches
+from llama_stack_api import batches, benchmarks

 # Router factories for APIs that have FastAPI routers
 # Add new APIs here as they are migrated to the router system
@ -25,6 +25,7 @@ from llama_stack_api.datatypes import Api

 _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
    "batches": batches.fastapi_routes.create_router,
+    "benchmarks": benchmarks.fastapi_routes.create_router,
 }


--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@ -13,6 +13,11 @@ from aiohttp import hdrs
 from starlette.routing import Route

 from llama_stack.core.resolver import api_protocol_map
+from llama_stack.core.server.fastapi_router_registry import (
+    _ROUTER_FACTORIES,
+    build_fastapi_router,
+    get_router_routes,
+)
 from llama_stack_api import Api, ExternalApiSpec, WebMethod

 EndpointFunc = Callable[..., Any]
@ -85,7 +90,53 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No

        return f"^{pattern}$"

+    # Process routes from FastAPI routers
+    for api_name in _ROUTER_FACTORIES.keys():
+        api = Api(api_name)
+        if api not in impls:
+            continue
+        impl = impls[api]
+        router = build_fastapi_router(api, impl)
+        if router:
+            router_routes = get_router_routes(router)
+            for route in router_routes:
+                # Get the endpoint function from the route
+                # For FastAPI routes, the endpoint is the actual function
+                func = route.endpoint
+                if func is None:
+                    continue
+
+                # Get the first (and typically only) method from the set, filtering out HEAD
+                available_methods = [m for m in (route.methods or []) if m != "HEAD"]
+                if not available_methods:
+                    continue  # Skip if only HEAD method is available
+                method = available_methods[0].lower()
+
+                if method not in route_impls:
+                    route_impls[method] = {}
+
+                # Create a minimal WebMethod for router routes (needed for RouteMatch tuple)
+                # We don't have webmethod metadata for router routes, so create a minimal one
+                # that has the attributes used by the library client (descriptive_name for tracing)
+                #
+                # TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers):
+                #   - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects)
+                #   - Pass summary directly in RouteMatch instead of WebMethod
+                #   - Remove this WebMethod() instantiation entirely
+                #   - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name
+                webmethod = WebMethod(descriptive_name=None)
+                route_impls[method][_convert_path_to_regex(route.path)] = (
+                    func,
+                    route.path,
+                    webmethod,
+                )
+
+    # Process routes from legacy webmethod-based APIs
    for api, api_routes in api_to_routes.items():
+        # Skip APIs that have routers (already processed above)
+        if api.value in _ROUTER_FACTORIES:
+            continue
+
        if api not in impls:
            continue
        for route, webmethod in api_routes:
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@ -6,12 +6,14 @@

 import asyncio
 import importlib.resources
+import inspect
 import os
 import re
 import tempfile
-from typing import Any
+from typing import Any, get_type_hints

 import yaml
+from pydantic import BaseModel

 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackConfig, VectorStoresConfig
@ -108,6 +110,81 @@ REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None


+def is_request_model(t: Any) -> bool:
+    """Check if a type is a request model (Pydantic BaseModel).
+
+    Args:
+        t: The type to check
+
+    Returns:
+        True if the type is a Pydantic BaseModel subclass, False otherwise
+    """
+
+    return inspect.isclass(t) and issubclass(t, BaseModel)
+
+
+async def invoke_with_optional_request(method: Any) -> Any:
+    """Invoke a method, automatically creating a request instance if needed.
+
+    For APIs that use request models, this will create an empty request object.
+    For backward compatibility, falls back to calling without arguments.
+
+    Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
+
+    Handles methods with:
+    - No parameters: calls without arguments
+    - One or more request model parameters: creates empty instances for each
+    - Mixed parameters: creates request models, uses defaults for others
+    - Required non-request-model parameters without defaults: falls back to calling without arguments
+
+    Args:
+        method: The method to invoke
+
+    Returns:
+        The result of calling the method
+    """
+    try:
+        hints = get_type_hints(method)
+    except Exception:
+        # Forward references can't be resolved, fall back to calling without request
+        return await method()
+
+    params = list(inspect.signature(method).parameters.values())
+    params = [p for p in params if p.name != "self"]
+
+    if not params:
+        return await method()
+
+    # Build arguments for the method call
+    args: dict[str, Any] = {}
+    can_call = True
+
+    for param in params:
+        param_type = hints.get(param.name)
+
+        # If it's a request model, try to create an empty instance
+        if param_type and is_request_model(param_type):
+            try:
+                args[param.name] = param_type()
+            except Exception:
+                # Request model requires arguments, can't create empty instance
+                can_call = False
+                break
+        # If it has a default value, we can skip it (will use default)
+        elif param.default != inspect.Parameter.empty:
+            continue
+        # Required parameter that's not a request model - can't provide it
+        else:
+            can_call = False
+            break
+
+    if can_call and args:
+        return await method(**args)
+
+    # Fall back to calling without arguments for backward compatibility
+    return await method()
+
+
 async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
    for rsrc, api, register_method, list_method in RESOURCES:
        objects = getattr(run_config.registered_resources, rsrc)
@ -129,7 +206,7 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
            await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})

        method = getattr(impls[api], list_method)
-        response = await method()
+        response = await invoke_with_optional_request(method)

        objects_to_process = response.data if hasattr(response, "data") else response

--- a/src/llama_stack_api/init.py
+++ b/src/llama_stack_api/init.py
@ -40,7 +40,11 @@ from .benchmarks import (
    BenchmarkInput,
    Benchmarks,
    CommonBenchmarkFields,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
 )

 # Import commonly used types from common submodule
@ -567,7 +571,11 @@ __all__ = [
    "LLMRAGQueryGeneratorConfig",
    "ListBatchesResponse",
    "RetrieveBatchRequest",
+    "GetBenchmarkRequest",
+    "ListBenchmarksRequest",
    "ListBenchmarksResponse",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
    "ListDatasetsResponse",
    "ListModelsResponse",
    "ListOpenAIChatCompletionResponse",
--- a/src/llama_stack_api/benchmarks.py
+++ b/src/llama_stack_api/benchmarks.py
@ -1,105 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import Any, Literal, Protocol, runtime_checkable
-
-from pydantic import BaseModel, Field
-
-from llama_stack_api.resource import Resource, ResourceType
-from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
-
-
-class CommonBenchmarkFields(BaseModel):
-    dataset_id: str
-    scoring_functions: list[str]
-    metadata: dict[str, Any] = Field(
-        default_factory=dict,
-        description="Metadata for this evaluation task",
-    )
-
-
-@json_schema_type
-class Benchmark(CommonBenchmarkFields, Resource):
-    """A benchmark resource for evaluating model performance.
-
-    :param dataset_id: Identifier of the dataset to use for the benchmark evaluation
-    :param scoring_functions: List of scoring function identifiers to apply during evaluation
-    :param metadata: Metadata for this evaluation task
-    :param type: The resource type, always benchmark
-    """
-
-    type: Literal[ResourceType.benchmark] = ResourceType.benchmark
-
-    @property
-    def benchmark_id(self) -> str:
-        return self.identifier
-
-    @property
-    def provider_benchmark_id(self) -> str | None:
-        return self.provider_resource_id
-
-
-class BenchmarkInput(CommonBenchmarkFields, BaseModel):
-    benchmark_id: str
-    provider_id: str | None = None
-    provider_benchmark_id: str | None = None
-
-
-@json_schema_type
-class ListBenchmarksResponse(BaseModel):
-    data: list[Benchmark]
-
-
-@runtime_checkable
-class Benchmarks(Protocol):
-    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
-    async def list_benchmarks(self) -> ListBenchmarksResponse:
-        """List all benchmarks.
-
-        :returns: A ListBenchmarksResponse.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
-    async def get_benchmark(
-        self,
-        benchmark_id: str,
-    ) -> Benchmark:
-        """Get a benchmark by its ID.
-
-        :param benchmark_id: The ID of the benchmark to get.
-        :returns: A Benchmark.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
-    async def register_benchmark(
-        self,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: list[str],
-        provider_benchmark_id: str | None = None,
-        provider_id: str | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> None:
-        """Register a benchmark.
-
-        :param benchmark_id: The ID of the benchmark to register.
-        :param dataset_id: The ID of the dataset to use for the benchmark.
-        :param scoring_functions: The scoring functions to use for the benchmark.
-        :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
-        :param provider_id: The ID of the provider to use for the benchmark.
-        :param metadata: The metadata to use for the benchmark.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
-    async def unregister_benchmark(self, benchmark_id: str) -> None:
-        """Unregister a benchmark.
-
-        :param benchmark_id: The ID of the benchmark to unregister.
-        """
-        ...
--- a/src/llama_stack_api/benchmarks/init.py
+++ b/src/llama_stack_api/benchmarks/init.py
@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Benchmarks API protocol and models.
+
+This module contains the Benchmarks protocol definition.
+Pydantic models are defined in llama_stack_api.benchmarks.models.
+The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
+"""
+
+# Import fastapi_routes for router factory access
+from . import fastapi_routes
+
+# Import protocol for re-export
+from .api import Benchmarks
+
+# Import models for re-export
+from .models import (
+    Benchmark,
+    BenchmarkInput,
+    CommonBenchmarkFields,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+__all__ = [
+    "Benchmarks",
+    "Benchmark",
+    "BenchmarkInput",
+    "CommonBenchmarkFields",
+    "ListBenchmarksResponse",
+    "ListBenchmarksRequest",
+    "GetBenchmarkRequest",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
+    "fastapi_routes",
+]
--- a/src/llama_stack_api/benchmarks/api.py
+++ b/src/llama_stack_api/benchmarks/api.py
@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Protocol, runtime_checkable
+
+from .models import (
+    Benchmark,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+
+@runtime_checkable
+class Benchmarks(Protocol):
+    async def list_benchmarks(
+        self,
+        request: ListBenchmarksRequest,
+    ) -> ListBenchmarksResponse: ...
+
+    async def get_benchmark(
+        self,
+        request: GetBenchmarkRequest,
+    ) -> Benchmark: ...
+
+    async def register_benchmark(
+        self,
+        request: RegisterBenchmarkRequest,
+    ) -> None: ...
+
+    async def unregister_benchmark(
+        self,
+        request: UnregisterBenchmarkRequest,
+    ) -> None: ...
--- a/src/llama_stack_api/benchmarks/fastapi_routes.py
+++ b/src/llama_stack_api/benchmarks/fastapi_routes.py
@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""FastAPI router for the Benchmarks API.
+
+This module defines the FastAPI router for the Benchmarks API using standard
+FastAPI route decorators. The router is defined in the API package to keep
+all API-related code together.
+"""
+
+from typing import Annotated
+
+from fastapi import APIRouter, Body, Depends
+
+from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
+
+from .api import Benchmarks
+from .models import (
+    Benchmark,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+# Automatically generate dependency functions from Pydantic models
+# This ensures the models are the single source of truth for descriptions
+get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
+get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
+get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
+
+
+def create_router(impl: Benchmarks) -> APIRouter:
+    """Create a FastAPI router for the Benchmarks API.
+
+    Args:
+        impl: The Benchmarks implementation instance
+
+    Returns:
+        APIRouter configured for the Benchmarks API
+    """
+    router = APIRouter(
+        prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
+        tags=["Benchmarks"],
+        responses=standard_responses,
+    )
+
+    @router.get(
+        "/eval/benchmarks",
+        response_model=ListBenchmarksResponse,
+        summary="List all benchmarks.",
+        description="List all benchmarks.",
+        responses={
+            200: {"description": "A ListBenchmarksResponse."},
+        },
+    )
+    async def list_benchmarks(
+        request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
+    ) -> ListBenchmarksResponse:
+        return await impl.list_benchmarks(request)
+
+    @router.get(
+        "/eval/benchmarks/{benchmark_id}",
+        response_model=Benchmark,
+        summary="Get a benchmark by its ID.",
+        description="Get a benchmark by its ID.",
+        responses={
+            200: {"description": "A Benchmark."},
+        },
+    )
+    async def get_benchmark(
+        request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
+    ) -> Benchmark:
+        return await impl.get_benchmark(request)
+
+    @router.post(
+        "/eval/benchmarks",
+        summary="Register a benchmark.",
+        description="Register a benchmark.",
+        responses={
+            200: {"description": "The benchmark was successfully registered."},
+        },
+        deprecated=True,
+    )
+    async def register_benchmark(
+        request: Annotated[RegisterBenchmarkRequest, Body(...)],
+    ) -> None:
+        return await impl.register_benchmark(request)
+
+    @router.delete(
+        "/eval/benchmarks/{benchmark_id}",
+        summary="Unregister a benchmark.",
+        description="Unregister a benchmark.",
+        responses={
+            200: {"description": "The benchmark was successfully unregistered."},
+        },
+        deprecated=True,
+    )
+    async def unregister_benchmark(
+        request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
+    ) -> None:
+        return await impl.unregister_benchmark(request)
+
+    return router
--- a/src/llama_stack_api/benchmarks/models.py
+++ b/src/llama_stack_api/benchmarks/models.py
@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Pydantic models for Benchmarks API requests and responses.
+
+This module defines the request and response models for the Benchmarks API
+using Pydantic with Field descriptions for OpenAPI schema generation.
+"""
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type
+
+
+@json_schema_type
+class ListBenchmarksRequest(BaseModel):
+    """Request model for listing benchmarks."""
+
+    pass
+
+
+@json_schema_type
+class GetBenchmarkRequest(BaseModel):
+    """Request model for getting a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
+
+
+@json_schema_type
+class RegisterBenchmarkRequest(BaseModel):
+    """Request model for registering a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
+    dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
+    scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
+    provider_benchmark_id: str | None = Field(
+        default=None, description="The ID of the provider benchmark to use for the benchmark."
+    )
+    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
+    metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
+
+
+@json_schema_type
+class UnregisterBenchmarkRequest(BaseModel):
+    """Request model for unregistering a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
+
+
+class CommonBenchmarkFields(BaseModel):
+    dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
+    scoring_functions: list[str] = Field(
+        ..., description="List of scoring function identifiers to apply during evaluation."
+    )
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Metadata for this evaluation task.",
+    )
+
+
+@json_schema_type
+class Benchmark(CommonBenchmarkFields, Resource):
+    """A benchmark resource for evaluating model performance."""
+
+    type: Literal[ResourceType.benchmark] = Field(
+        default=ResourceType.benchmark,
+        description="The resource type, always benchmark.",
+    )
+
+    @property
+    def benchmark_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_benchmark_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class BenchmarkInput(CommonBenchmarkFields, BaseModel):
+    benchmark_id: str = Field(..., description="The ID of the benchmark.")
+    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
+    provider_benchmark_id: str | None = Field(
+        default=None, description="The ID of the provider benchmark to use for the benchmark."
+    )
+
+
+@json_schema_type
+class ListBenchmarksResponse(BaseModel):
+    """Response containing a list of benchmark objects."""
+
+    data: list[Benchmark] = Field(..., description="List of benchmark objects.")
+
+
+__all__ = [
+    "ListBenchmarksRequest",
+    "GetBenchmarkRequest",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
+    "CommonBenchmarkFields",
+    "Benchmark",
+    "BenchmarkInput",
+    "ListBenchmarksResponse",
+]
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@ -22,14 +22,17 @@ from llama_stack_api import (
    Api,
    Dataset,
    DatasetPurpose,
+    ListBenchmarksRequest,
    ListToolDefsResponse,
    Model,
    ModelNotFoundError,
    ModelType,
    NumberType,
+    RegisterBenchmarkRequest,
    Shield,
    ToolDef,
    ToolGroup,
+    UnregisterBenchmarkRequest,
    URIDataSource,
 )

@ -420,24 +423,26 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):

    # Register multiple benchmarks and verify listing
    await table.register_benchmark(
-        benchmark_id="test-benchmark",
-        dataset_id="test-dataset",
-        scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
+        RegisterBenchmarkRequest(
+            benchmark_id="test-benchmark",
+            dataset_id="test-dataset",
+            scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
+        )
    )
-    benchmarks = await table.list_benchmarks()
+    benchmarks = await table.list_benchmarks(ListBenchmarksRequest())

    assert len(benchmarks.data) == 1
    benchmark_ids = {b.identifier for b in benchmarks.data}
    assert "test-benchmark" in benchmark_ids

    # Unregister the benchmark and verify removal
-    await table.unregister_benchmark(benchmark_id="test-benchmark")
-    benchmarks_after = await table.list_benchmarks()
+    await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="test-benchmark"))
+    benchmarks_after = await table.list_benchmarks(ListBenchmarksRequest())
    assert len(benchmarks_after.data) == 0

    # Unregistering a non-existent benchmark should raise a clear error
    with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
-        await table.unregister_benchmark(benchmark_id="dummy_benchmark")
+        await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="dummy_benchmark"))


 async def test_tool_groups_routing_table(cached_disk_dist_registry):