diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 624443b00..61fa6705c 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -3404,7 +3404,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
       - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
       description: List all benchmarks.
       operationId: list_benchmarks_v1alpha_eval_benchmarks_get
     post:
@@ -3422,10 +3422,10 @@ paths:
           description: Default Response
           $ref: '#/components/responses/DefaultError'
         '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
       tags:
       - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
       description: Register a benchmark.
       operationId: register_benchmark_v1alpha_eval_benchmarks_post
       requestBody:
@@ -3445,20 +3445,20 @@ paths:
               schema:
                 $ref: '#/components/schemas/Benchmark'
         '400':
-          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          description: Too Many Requests
           $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          description: Internal Server Error
           $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
-          description: Default Response
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
       - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
       description: Get a benchmark by its ID.
       operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
@@ -3467,26 +3467,28 @@ paths:
         required: true
         schema:
           type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
     delete:
       responses:
         '400':
-          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          description: Too Many Requests
           $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          description: Internal Server Error
           $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
-          description: Default Response
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
         '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
       tags:
       - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
       description: Unregister a benchmark.
       operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
       parameters:
@@ -3495,7 +3497,9 @@ paths:
         required: true
         schema:
           type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+        description: The ID of the benchmark to unregister.
       deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
@@ -10391,20 +10395,23 @@ components:
           type: string
           const: benchmark
           title: Type
+          description: The resource type, always benchmark.
           default: benchmark
         dataset_id:
           type: string
           title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
         metadata:
           additionalProperties: true
           type: object
           title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
       type: object
       required:
       - identifier
@@ -10420,10 +10427,12 @@ components:
             $ref: '#/components/schemas/Benchmark'
           type: array
           title: Data
+          description: List of benchmark objects.
       type: object
       required:
       - data
       title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
     BenchmarkConfig:
       properties:
         eval_candidate:
@@ -11385,33 +11394,40 @@ components:
         benchmark_id:
           type: string
           title: Benchmark Id
+          description: The ID of the benchmark to register.
         dataset_id:
           type: string
           title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
         provider_benchmark_id:
           anyOf:
           - type: string
           - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
         provider_id:
           anyOf:
           - type: string
           - type: 'null'
+          description: The ID of the provider to use for the benchmark.
         metadata:
           anyOf:
           - additionalProperties: true
             type: object
           - type: 'null'
+          description: The metadata to use for the benchmark.
       type: object
       required:
       - benchmark_id
       - dataset_id
       - scoring_functions
       title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
     AllowedToolsFilter:
       properties:
         tool_names:
@@ -12617,6 +12633,33 @@ components:
       - batch_id
       title: CancelBatchRequest
       type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
     DialogType:
       description: Parameter type for dialog data with semantic output labels.
       properties:
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index 3f9766ae5..64f47d617 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -794,7 +794,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
       - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
       description: List all benchmarks.
       operationId: list_benchmarks_v1alpha_eval_benchmarks_get
     post:
@@ -812,10 +812,10 @@ paths:
           description: Default Response
           $ref: '#/components/responses/DefaultError'
         '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
       tags:
       - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
       description: Register a benchmark.
       operationId: register_benchmark_v1alpha_eval_benchmarks_post
       requestBody:
@@ -835,20 +835,20 @@ paths:
               schema:
                 $ref: '#/components/schemas/Benchmark'
         '400':
-          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          description: Too Many Requests
           $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          description: Internal Server Error
           $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
-          description: Default Response
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
       - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
       description: Get a benchmark by its ID.
       operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
@@ -857,26 +857,28 @@ paths:
         required: true
         schema:
           type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
     delete:
       responses:
         '400':
-          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          description: Too Many Requests
           $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          description: Internal Server Error
           $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
-          description: Default Response
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
         '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
       tags:
       - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
       description: Unregister a benchmark.
       operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
       parameters:
@@ -885,7 +887,9 @@ paths:
         required: true
         schema:
           type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+        description: The ID of the benchmark to unregister.
       deprecated: true
 components:
   schemas:
@@ -7375,20 +7379,23 @@ components:
           type: string
           const: benchmark
           title: Type
+          description: The resource type, always benchmark.
           default: benchmark
         dataset_id:
           type: string
           title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
         metadata:
           additionalProperties: true
           type: object
           title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
       type: object
       required:
       - identifier
@@ -7404,10 +7411,12 @@ components:
             $ref: '#/components/schemas/Benchmark'
           type: array
           title: Data
+          description: List of benchmark objects.
       type: object
       required:
       - data
       title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
     BenchmarkConfig:
       properties:
         eval_candidate:
@@ -8369,33 +8378,40 @@ components:
         benchmark_id:
           type: string
           title: Benchmark Id
+          description: The ID of the benchmark to register.
         dataset_id:
           type: string
           title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
         provider_benchmark_id:
           anyOf:
           - type: string
           - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
         provider_id:
           anyOf:
           - type: string
           - type: 'null'
+          description: The ID of the provider to use for the benchmark.
         metadata:
           anyOf:
           - additionalProperties: true
             type: object
           - type: 'null'
+          description: The metadata to use for the benchmark.
       type: object
       required:
       - benchmark_id
       - dataset_id
       - scoring_functions
       title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
     AllowedToolsFilter:
       properties:
         tool_names:
@@ -9601,6 +9617,33 @@ components:
       - batch_id
       title: CancelBatchRequest
       type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
     DialogType:
       description: Parameter type for dialog data with semantic output labels.
       properties:
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index 806972854..3a488073b 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -188,7 +188,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
       - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
       description: List all benchmarks.
       operationId: list_benchmarks_v1alpha_eval_benchmarks_get
   /v1alpha/eval/benchmarks/{benchmark_id}:
@@ -201,20 +201,20 @@ paths:
               schema:
                 $ref: '#/components/schemas/Benchmark'
         '400':
-          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          description: Too Many Requests
           $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          description: Internal Server Error
           $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
-          description: Default Response
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
       - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
       description: Get a benchmark by its ID.
       operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
@@ -223,7 +223,9 @@ paths:
         required: true
         schema:
           type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
       responses:
@@ -6517,20 +6519,23 @@ components:
           type: string
           const: benchmark
           title: Type
+          description: The resource type, always benchmark.
           default: benchmark
         dataset_id:
           type: string
           title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
         metadata:
           additionalProperties: true
           type: object
           title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
       type: object
       required:
       - identifier
@@ -6546,10 +6551,12 @@ components:
             $ref: '#/components/schemas/Benchmark'
           type: array
           title: Data
+          description: List of benchmark objects.
       type: object
       required:
       - data
       title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
     BenchmarkConfig:
       properties:
         eval_candidate:
@@ -7346,6 +7353,45 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterBenchmarkRequest:
+      properties:
+        benchmark_id:
+          type: string
+          title: Benchmark Id
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          description: The metadata to use for the benchmark.
+      type: object
+      required:
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
+      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
     AllowedToolsFilter:
       properties:
         tool_names:
@@ -8395,6 +8441,33 @@ components:
       - batch_id
       title: CancelBatchRequest
       type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
     DialogType:
       description: Parameter type for dialog data with semantic output labels.
       properties:
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index b840ed798..6754f1773 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -9166,20 +9166,23 @@ components:
           type: string
           const: benchmark
           title: Type
+          description: The resource type, always benchmark.
           default: benchmark
         dataset_id:
           type: string
           title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
         metadata:
           additionalProperties: true
           type: object
           title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
       type: object
       required:
       - identifier
@@ -9195,10 +9198,12 @@ components:
             $ref: '#/components/schemas/Benchmark'
           type: array
           title: Data
+          description: List of benchmark objects.
       type: object
       required:
       - data
       title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
     BenchmarkConfig:
       properties:
         eval_candidate:
@@ -9848,6 +9853,45 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterBenchmarkRequest:
+      properties:
+        benchmark_id:
+          type: string
+          title: Benchmark Id
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          items:
+            type: string
+          type: array
+          title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+          description: The ID of the provider to use for the benchmark.
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+          description: The metadata to use for the benchmark.
+      type: object
+      required:
+      - benchmark_id
+      - dataset_id
+      - scoring_functions
+      title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
     AllowedToolsFilter:
       properties:
         tool_names:
@@ -11053,6 +11097,33 @@ components:
       - batch_id
       title: CancelBatchRequest
       type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
     DialogType:
       description: Parameter type for dialog data with semantic output labels.
       properties:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 624443b00..61fa6705c 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -3404,7 +3404,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
       - Benchmarks
-      summary: List Benchmarks
+      summary: List all benchmarks.
       description: List all benchmarks.
       operationId: list_benchmarks_v1alpha_eval_benchmarks_get
     post:
@@ -3422,10 +3422,10 @@ paths:
           description: Default Response
           $ref: '#/components/responses/DefaultError'
         '204':
-          description: Successful Response
+          description: The benchmark was successfully registered.
       tags:
       - Benchmarks
-      summary: Register Benchmark
+      summary: Register a benchmark.
       description: Register a benchmark.
       operationId: register_benchmark_v1alpha_eval_benchmarks_post
       requestBody:
@@ -3445,20 +3445,20 @@ paths:
               schema:
                 $ref: '#/components/schemas/Benchmark'
         '400':
-          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          description: Too Many Requests
           $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          description: Internal Server Error
           $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
-          description: Default Response
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
       tags:
       - Benchmarks
-      summary: Get Benchmark
+      summary: Get a benchmark by its ID.
       description: Get a benchmark by its ID.
       operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
       parameters:
@@ -3467,26 +3467,28 @@ paths:
         required: true
         schema:
           type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+        description: The ID of the benchmark to get.
     delete:
       responses:
         '400':
-          description: Bad Request
           $ref: '#/components/responses/BadRequest400'
+          description: Bad Request
         '429':
-          description: Too Many Requests
           $ref: '#/components/responses/TooManyRequests429'
+          description: Too Many Requests
         '500':
-          description: Internal Server Error
           $ref: '#/components/responses/InternalServerError500'
+          description: Internal Server Error
         default:
-          description: Default Response
           $ref: '#/components/responses/DefaultError'
+          description: Default Response
         '204':
-          description: Successful Response
+          description: The benchmark was successfully unregistered.
       tags:
       - Benchmarks
-      summary: Unregister Benchmark
+      summary: Unregister a benchmark.
       description: Unregister a benchmark.
       operationId: unregister_benchmark_v1alpha_eval_benchmarks__benchmark_id__delete
       parameters:
@@ -3495,7 +3497,9 @@ paths:
         required: true
         schema:
           type: string
-        description: 'Path parameter: benchmark_id'
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+        description: The ID of the benchmark to unregister.
       deprecated: true
   /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
     post:
@@ -10391,20 +10395,23 @@ components:
           type: string
           const: benchmark
           title: Type
+          description: The resource type, always benchmark.
           default: benchmark
         dataset_id:
           type: string
           title: Dataset Id
+          description: Identifier of the dataset to use for the benchmark evaluation.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: List of scoring function identifiers to apply during evaluation.
         metadata:
           additionalProperties: true
           type: object
           title: Metadata
-          description: Metadata for this evaluation task
+          description: Metadata for this evaluation task.
       type: object
       required:
       - identifier
@@ -10420,10 +10427,12 @@ components:
             $ref: '#/components/schemas/Benchmark'
           type: array
           title: Data
+          description: List of benchmark objects.
       type: object
       required:
       - data
       title: ListBenchmarksResponse
+      description: Response containing a list of benchmark objects.
     BenchmarkConfig:
       properties:
         eval_candidate:
@@ -11385,33 +11394,40 @@ components:
         benchmark_id:
           type: string
           title: Benchmark Id
+          description: The ID of the benchmark to register.
         dataset_id:
           type: string
           title: Dataset Id
+          description: The ID of the dataset to use for the benchmark.
         scoring_functions:
           items:
             type: string
           type: array
           title: Scoring Functions
+          description: The scoring functions to use for the benchmark.
         provider_benchmark_id:
           anyOf:
           - type: string
           - type: 'null'
+          description: The ID of the provider benchmark to use for the benchmark.
         provider_id:
           anyOf:
           - type: string
           - type: 'null'
+          description: The ID of the provider to use for the benchmark.
         metadata:
           anyOf:
           - additionalProperties: true
             type: object
           - type: 'null'
+          description: The metadata to use for the benchmark.
       type: object
       required:
       - benchmark_id
       - dataset_id
       - scoring_functions
       title: RegisterBenchmarkRequest
+      description: Request model for registering a benchmark.
     AllowedToolsFilter:
       properties:
         tool_names:
@@ -12617,6 +12633,33 @@ components:
       - batch_id
       title: CancelBatchRequest
       type: object
+    ListBenchmarksRequest:
+      description: Request model for listing benchmarks.
+      properties: {}
+      title: ListBenchmarksRequest
+      type: object
+    GetBenchmarkRequest:
+      description: Request model for getting a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to get.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: GetBenchmarkRequest
+      type: object
+    UnregisterBenchmarkRequest:
+      description: Request model for unregistering a benchmark.
+      properties:
+        benchmark_id:
+          description: The ID of the benchmark to unregister.
+          title: Benchmark Id
+          type: string
+      required:
+      - benchmark_id
+      title: UnregisterBenchmarkRequest
+      type: object
     DialogType:
       description: Parameter type for dialog data with semantic output labels.
       properties:
diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
index 966723d9b..b3ca816cf 100644
--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@@ -9,6 +9,7 @@ from importlib.metadata import version
 from pydantic import BaseModel
 
 from llama_stack.core.datatypes import StackConfig
+from llama_stack.core.distribution import builtin_automatically_routed_apis
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.fastapi_router_registry import (
     _ROUTER_FACTORIES,
@@ -65,6 +66,17 @@ class DistributionInspectImpl(Inspect):
         def _get_provider_types(api: Api) -> list[str]:
             if api.value in ["providers", "inspect"]:
                 return []  # These APIs don't have "real" providers  they're internal to the stack
+
+            # For routing table APIs, look up providers from their router API
+            # (e.g., benchmarks -> eval, models -> inference, etc.)
+            auto_routed_apis = builtin_automatically_routed_apis()
+            for auto_routed in auto_routed_apis:
+                if auto_routed.routing_table_api == api:
+                    # This is a routing table API, use its router API for providers
+                    providers = config.providers.get(auto_routed.router_api.value, [])
+                    return [p.provider_type for p in providers] if providers else []
+
+            # Regular API, look up providers directly
             providers = config.providers.get(api.value, [])
             return [p.provider_type for p in providers] if providers else []
 
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index 44545fa55..9d2ed3953 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -10,6 +10,7 @@ import json
 import logging  # allow-direct-logging
 import os
 import sys
+import typing
 from enum import Enum
 from io import BytesIO
 from pathlib import Path
@@ -490,6 +491,25 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                 unwrapped_body_param = param
                 break
 
+        # Check for parameters with Depends() annotation (FastAPI router endpoints)
+        # These need special handling: construct the request model from body
+        depends_param = None
+        for param in params_list:
+            param_type = param.annotation
+            if get_origin(param_type) is typing.Annotated:
+                args = get_args(param_type)
+                if len(args) > 1:
+                    # Check if any metadata is Depends
+                    metadata = args[1:]
+                    for item in metadata:
+                        # Check if it's a Depends object (has dependency attribute or is a callable)
+                        # Depends objects typically have a 'dependency' attribute or are callable functions
+                        if hasattr(item, "dependency") or callable(item) or "Depends" in str(type(item)):
+                            depends_param = param
+                            break
+                if depends_param:
+                    break
+
         # Convert parameters to Pydantic models where needed
         converted_body = {}
         for param_name, param in sig.parameters.items():
@@ -500,6 +520,27 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                 else:
                     converted_body[param_name] = convert_to_pydantic(param.annotation, value)
 
+        # Handle Depends parameter: construct request model from body
+        if depends_param and depends_param.name not in converted_body:
+            param_type = depends_param.annotation
+            if get_origin(param_type) is typing.Annotated:
+                base_type = get_args(param_type)[0]
+                # Handle Union types (e.g., SomeRequestModel | None) - extract the non-None type
+                # In Python 3.10+, Union types created with | syntax are still typing.Union
+                origin = get_origin(base_type)
+                if origin is Union:
+                    # Get the first non-None type from the Union
+                    union_args = get_args(base_type)
+                    base_type = next(
+                        (t for t in union_args if t is not type(None) and t is not None),
+                        union_args[0] if union_args else None,
+                    )
+
+                # Only try to instantiate if it's a class (not a Union or other non-callable type)
+                if base_type is not None and inspect.isclass(base_type) and callable(base_type):
+                    # Construct the request model from all body parameters
+                    converted_body[depends_param.name] = base_type(**body)
+
         # handle unwrapped body parameter after processing all named parameters
         if unwrapped_body_param:
             base_type = get_args(unwrapped_body_param.annotation)[0]
diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
index 9037ffe8b..d5e3799ba 100644
--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@@ -4,13 +4,20 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any
 
 from llama_stack.core.datatypes import (
     BenchmarkWithOwner,
 )
 from llama_stack.log import get_logger
-from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api import (
+    Benchmark,
+    Benchmarks,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
 
 from .common import CommonRoutingTableImpl
 
@@ -18,26 +25,21 @@ logger = get_logger(name=__name__, category="core::routing_tables")
 
 
 class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
-    async def list_benchmarks(self) -> ListBenchmarksResponse:
+    async def list_benchmarks(self, request: ListBenchmarksRequest) -> ListBenchmarksResponse:
         return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark"))
 
-    async def get_benchmark(self, benchmark_id: str) -> Benchmark:
-        benchmark = await self.get_object_by_identifier("benchmark", benchmark_id)
+    async def get_benchmark(self, request: GetBenchmarkRequest) -> Benchmark:
+        benchmark = await self.get_object_by_identifier("benchmark", request.benchmark_id)
         if benchmark is None:
-            raise ValueError(f"Benchmark '{benchmark_id}' not found")
+            raise ValueError(f"Benchmark '{request.benchmark_id}' not found")
         return benchmark
 
     async def register_benchmark(
         self,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: list[str],
-        metadata: dict[str, Any] | None = None,
-        provider_benchmark_id: str | None = None,
-        provider_id: str | None = None,
+        request: RegisterBenchmarkRequest,
     ) -> None:
-        if metadata is None:
-            metadata = {}
+        metadata = request.metadata if request.metadata is not None else {}
+        provider_id = request.provider_id
         if provider_id is None:
             if len(self.impls_by_provider_id) == 1:
                 provider_id = list(self.impls_by_provider_id.keys())[0]
@@ -45,18 +47,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
                 raise ValueError(
                     "No provider specified and multiple providers available. Please specify a provider_id."
                 )
+        provider_benchmark_id = request.provider_benchmark_id
         if provider_benchmark_id is None:
-            provider_benchmark_id = benchmark_id
+            provider_benchmark_id = request.benchmark_id
         benchmark = BenchmarkWithOwner(
-            identifier=benchmark_id,
-            dataset_id=dataset_id,
-            scoring_functions=scoring_functions,
+            identifier=request.benchmark_id,
+            dataset_id=request.dataset_id,
+            scoring_functions=request.scoring_functions,
             metadata=metadata,
             provider_id=provider_id,
             provider_resource_id=provider_benchmark_id,
         )
         await self.register_object(benchmark)
 
-    async def unregister_benchmark(self, benchmark_id: str) -> None:
-        existing_benchmark = await self.get_benchmark(benchmark_id)
+    async def unregister_benchmark(self, request: UnregisterBenchmarkRequest) -> None:
+        get_request = GetBenchmarkRequest(benchmark_id=request.benchmark_id)
+        existing_benchmark = await self.get_benchmark(get_request)
         await self.unregister_object(existing_benchmark)
diff --git a/src/llama_stack/core/server/fastapi_router_registry.py b/src/llama_stack/core/server/fastapi_router_registry.py
index 4119cc847..4ca1a9837 100644
--- a/src/llama_stack/core/server/fastapi_router_registry.py
+++ b/src/llama_stack/core/server/fastapi_router_registry.py
@@ -17,7 +17,7 @@ from fastapi import APIRouter
 from fastapi.routing import APIRoute
 from starlette.routing import Route
 
-from llama_stack_api import batches
+from llama_stack_api import batches, benchmarks
 
 # Router factories for APIs that have FastAPI routers
 # Add new APIs here as they are migrated to the router system
@@ -25,6 +25,7 @@ from llama_stack_api.datatypes import Api
 
 _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
     "batches": batches.fastapi_routes.create_router,
+    "benchmarks": benchmarks.fastapi_routes.create_router,
 }
 
 
diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
index 9df9e4a60..abefe3256 100644
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@@ -13,6 +13,11 @@ from aiohttp import hdrs
 from starlette.routing import Route
 
 from llama_stack.core.resolver import api_protocol_map
+from llama_stack.core.server.fastapi_router_registry import (
+    _ROUTER_FACTORIES,
+    build_fastapi_router,
+    get_router_routes,
+)
 from llama_stack_api import Api, ExternalApiSpec, WebMethod
 
 EndpointFunc = Callable[..., Any]
@@ -85,7 +90,53 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No
 
         return f"^{pattern}$"
 
+    # Process routes from FastAPI routers
+    for api_name in _ROUTER_FACTORIES.keys():
+        api = Api(api_name)
+        if api not in impls:
+            continue
+        impl = impls[api]
+        router = build_fastapi_router(api, impl)
+        if router:
+            router_routes = get_router_routes(router)
+            for route in router_routes:
+                # Get the endpoint function from the route
+                # For FastAPI routes, the endpoint is the actual function
+                func = route.endpoint
+                if func is None:
+                    continue
+
+                # Get the first (and typically only) method from the set, filtering out HEAD
+                available_methods = [m for m in (route.methods or []) if m != "HEAD"]
+                if not available_methods:
+                    continue  # Skip if only HEAD method is available
+                method = available_methods[0].lower()
+
+                if method not in route_impls:
+                    route_impls[method] = {}
+
+                # Create a minimal WebMethod for router routes (needed for RouteMatch tuple)
+                # We don't have webmethod metadata for router routes, so create a minimal one
+                # that has the attributes used by the library client (descriptive_name for tracing)
+                #
+                # TODO: Long-term migration plan (once all APIs are migrated to FastAPI routers):
+                #   - Extract summary from APIRoute: route.summary (available on FastAPI APIRoute objects)
+                #   - Pass summary directly in RouteMatch instead of WebMethod
+                #   - Remove this WebMethod() instantiation entirely
+                #   - Update library_client.py to use the extracted summary instead of webmethod.descriptive_name
+                webmethod = WebMethod(descriptive_name=None)
+                route_impls[method][_convert_path_to_regex(route.path)] = (
+                    func,
+                    route.path,
+                    webmethod,
+                )
+
+    # Process routes from legacy webmethod-based APIs
     for api, api_routes in api_to_routes.items():
+        # Skip APIs that have routers (already processed above)
+        if api.value in _ROUTER_FACTORIES:
+            continue
+
         if api not in impls:
             continue
         for route, webmethod in api_routes:
diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
index 9310bce41..eade24c1b 100644
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@@ -6,12 +6,14 @@
 
 import asyncio
 import importlib.resources
+import inspect
 import os
 import re
 import tempfile
-from typing import Any
+from typing import Any, get_type_hints
 
 import yaml
+from pydantic import BaseModel
 
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackConfig, VectorStoresConfig
@@ -108,6 +110,81 @@ REGISTRY_REFRESH_TASK = None
 TEST_RECORDING_CONTEXT = None
 
 
+def is_request_model(t: Any) -> bool:
+    """Check if a type is a request model (Pydantic BaseModel).
+
+    Args:
+        t: The type to check
+
+    Returns:
+        True if the type is a Pydantic BaseModel subclass, False otherwise
+    """
+
+    return inspect.isclass(t) and issubclass(t, BaseModel)
+
+
+async def invoke_with_optional_request(method: Any) -> Any:
+    """Invoke a method, automatically creating a request instance if needed.
+
+    For APIs that use request models, this will create an empty request object.
+    For backward compatibility, falls back to calling without arguments.
+
+    Uses get_type_hints() to resolve forward references (e.g., "ListBenchmarksRequest" -> actual class).
+
+    Handles methods with:
+    - No parameters: calls without arguments
+    - One or more request model parameters: creates empty instances for each
+    - Mixed parameters: creates request models, uses defaults for others
+    - Required non-request-model parameters without defaults: falls back to calling without arguments
+
+    Args:
+        method: The method to invoke
+
+    Returns:
+        The result of calling the method
+    """
+    try:
+        hints = get_type_hints(method)
+    except Exception:
+        # Forward references can't be resolved, fall back to calling without request
+        return await method()
+
+    params = list(inspect.signature(method).parameters.values())
+    params = [p for p in params if p.name != "self"]
+
+    if not params:
+        return await method()
+
+    # Build arguments for the method call
+    args: dict[str, Any] = {}
+    can_call = True
+
+    for param in params:
+        param_type = hints.get(param.name)
+
+        # If it's a request model, try to create an empty instance
+        if param_type and is_request_model(param_type):
+            try:
+                args[param.name] = param_type()
+            except Exception:
+                # Request model requires arguments, can't create empty instance
+                can_call = False
+                break
+        # If it has a default value, we can skip it (will use default)
+        elif param.default != inspect.Parameter.empty:
+            continue
+        # Required parameter that's not a request model - can't provide it
+        else:
+            can_call = False
+            break
+
+    if can_call and args:
+        return await method(**args)
+
+    # Fall back to calling without arguments for backward compatibility
+    return await method()
+
+
 async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
     for rsrc, api, register_method, list_method in RESOURCES:
         objects = getattr(run_config.registered_resources, rsrc)
@@ -129,7 +206,7 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
             await method(**{k: getattr(obj, k) for k in obj.model_dump().keys()})
 
         method = getattr(impls[api], list_method)
-        response = await method()
+        response = await invoke_with_optional_request(method)
 
         objects_to_process = response.data if hasattr(response, "data") else response
 
diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
index 407629eaa..e38f3cce6 100644
--- a/src/llama_stack_api/__init__.py
+++ b/src/llama_stack_api/__init__.py
@@ -40,7 +40,11 @@ from .benchmarks import (
     BenchmarkInput,
     Benchmarks,
     CommonBenchmarkFields,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
     ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
 )
 
 # Import commonly used types from common submodule
@@ -567,7 +571,11 @@ __all__ = [
     "LLMRAGQueryGeneratorConfig",
     "ListBatchesResponse",
     "RetrieveBatchRequest",
+    "GetBenchmarkRequest",
+    "ListBenchmarksRequest",
     "ListBenchmarksResponse",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
     "ListDatasetsResponse",
     "ListModelsResponse",
     "ListOpenAIChatCompletionResponse",
diff --git a/src/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py
deleted file mode 100644
index fdb2ccad4..000000000
--- a/src/llama_stack_api/benchmarks.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import Any, Literal, Protocol, runtime_checkable
-
-from pydantic import BaseModel, Field
-
-from llama_stack_api.resource import Resource, ResourceType
-from llama_stack_api.schema_utils import json_schema_type, webmethod
-from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
-
-
-class CommonBenchmarkFields(BaseModel):
-    dataset_id: str
-    scoring_functions: list[str]
-    metadata: dict[str, Any] = Field(
-        default_factory=dict,
-        description="Metadata for this evaluation task",
-    )
-
-
-@json_schema_type
-class Benchmark(CommonBenchmarkFields, Resource):
-    """A benchmark resource for evaluating model performance.
-
-    :param dataset_id: Identifier of the dataset to use for the benchmark evaluation
-    :param scoring_functions: List of scoring function identifiers to apply during evaluation
-    :param metadata: Metadata for this evaluation task
-    :param type: The resource type, always benchmark
-    """
-
-    type: Literal[ResourceType.benchmark] = ResourceType.benchmark
-
-    @property
-    def benchmark_id(self) -> str:
-        return self.identifier
-
-    @property
-    def provider_benchmark_id(self) -> str | None:
-        return self.provider_resource_id
-
-
-class BenchmarkInput(CommonBenchmarkFields, BaseModel):
-    benchmark_id: str
-    provider_id: str | None = None
-    provider_benchmark_id: str | None = None
-
-
-@json_schema_type
-class ListBenchmarksResponse(BaseModel):
-    data: list[Benchmark]
-
-
-@runtime_checkable
-class Benchmarks(Protocol):
-    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
-    async def list_benchmarks(self) -> ListBenchmarksResponse:
-        """List all benchmarks.
-
-        :returns: A ListBenchmarksResponse.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
-    async def get_benchmark(
-        self,
-        benchmark_id: str,
-    ) -> Benchmark:
-        """Get a benchmark by its ID.
-
-        :param benchmark_id: The ID of the benchmark to get.
-        :returns: A Benchmark.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
-    async def register_benchmark(
-        self,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: list[str],
-        provider_benchmark_id: str | None = None,
-        provider_id: str | None = None,
-        metadata: dict[str, Any] | None = None,
-    ) -> None:
-        """Register a benchmark.
-
-        :param benchmark_id: The ID of the benchmark to register.
-        :param dataset_id: The ID of the dataset to use for the benchmark.
-        :param scoring_functions: The scoring functions to use for the benchmark.
-        :param provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
-        :param provider_id: The ID of the provider to use for the benchmark.
-        :param metadata: The metadata to use for the benchmark.
-        """
-        ...
-
-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA, deprecated=True)
-    async def unregister_benchmark(self, benchmark_id: str) -> None:
-        """Unregister a benchmark.
-
-        :param benchmark_id: The ID of the benchmark to unregister.
-        """
-        ...
diff --git a/src/llama_stack_api/benchmarks/__init__.py b/src/llama_stack_api/benchmarks/__init__.py
new file mode 100644
index 000000000..9c5652dce
--- /dev/null
+++ b/src/llama_stack_api/benchmarks/__init__.py
@@ -0,0 +1,43 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Benchmarks API protocol and models.
+
+This module contains the Benchmarks protocol definition.
+Pydantic models are defined in llama_stack_api.benchmarks.models.
+The FastAPI router is defined in llama_stack_api.benchmarks.fastapi_routes.
+"""
+
+# Import fastapi_routes for router factory access
+from . import fastapi_routes
+
+# Import protocol for re-export
+from .api import Benchmarks
+
+# Import models for re-export
+from .models import (
+    Benchmark,
+    BenchmarkInput,
+    CommonBenchmarkFields,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+__all__ = [
+    "Benchmarks",
+    "Benchmark",
+    "BenchmarkInput",
+    "CommonBenchmarkFields",
+    "ListBenchmarksResponse",
+    "ListBenchmarksRequest",
+    "GetBenchmarkRequest",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
+    "fastapi_routes",
+]
diff --git a/src/llama_stack_api/benchmarks/api.py b/src/llama_stack_api/benchmarks/api.py
new file mode 100644
index 000000000..26f88dbe2
--- /dev/null
+++ b/src/llama_stack_api/benchmarks/api.py
@@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Protocol, runtime_checkable
+
+from .models import (
+    Benchmark,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+
+@runtime_checkable
+class Benchmarks(Protocol):
+    async def list_benchmarks(
+        self,
+        request: ListBenchmarksRequest,
+    ) -> ListBenchmarksResponse: ...
+
+    async def get_benchmark(
+        self,
+        request: GetBenchmarkRequest,
+    ) -> Benchmark: ...
+
+    async def register_benchmark(
+        self,
+        request: RegisterBenchmarkRequest,
+    ) -> None: ...
+
+    async def unregister_benchmark(
+        self,
+        request: UnregisterBenchmarkRequest,
+    ) -> None: ...
diff --git a/src/llama_stack_api/benchmarks/fastapi_routes.py b/src/llama_stack_api/benchmarks/fastapi_routes.py
new file mode 100644
index 000000000..461939ab9
--- /dev/null
+++ b/src/llama_stack_api/benchmarks/fastapi_routes.py
@@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""FastAPI router for the Benchmarks API.
+
+This module defines the FastAPI router for the Benchmarks API using standard
+FastAPI route decorators. The router is defined in the API package to keep
+all API-related code together.
+"""
+
+from typing import Annotated
+
+from fastapi import APIRouter, Body, Depends
+
+from llama_stack_api.router_utils import create_path_dependency, create_query_dependency, standard_responses
+from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA
+
+from .api import Benchmarks
+from .models import (
+    Benchmark,
+    GetBenchmarkRequest,
+    ListBenchmarksRequest,
+    ListBenchmarksResponse,
+    RegisterBenchmarkRequest,
+    UnregisterBenchmarkRequest,
+)
+
+# Automatically generate dependency functions from Pydantic models
+# This ensures the models are the single source of truth for descriptions
+get_list_benchmarks_request = create_query_dependency(ListBenchmarksRequest)
+get_get_benchmark_request = create_path_dependency(GetBenchmarkRequest)
+get_unregister_benchmark_request = create_path_dependency(UnregisterBenchmarkRequest)
+
+
+def create_router(impl: Benchmarks) -> APIRouter:
+    """Create a FastAPI router for the Benchmarks API.
+
+    Args:
+        impl: The Benchmarks implementation instance
+
+    Returns:
+        APIRouter configured for the Benchmarks API
+    """
+    router = APIRouter(
+        prefix=f"/{LLAMA_STACK_API_V1ALPHA}",
+        tags=["Benchmarks"],
+        responses=standard_responses,
+    )
+
+    @router.get(
+        "/eval/benchmarks",
+        response_model=ListBenchmarksResponse,
+        summary="List all benchmarks.",
+        description="List all benchmarks.",
+        responses={
+            200: {"description": "A ListBenchmarksResponse."},
+        },
+    )
+    async def list_benchmarks(
+        request: Annotated[ListBenchmarksRequest, Depends(get_list_benchmarks_request)],
+    ) -> ListBenchmarksResponse:
+        return await impl.list_benchmarks(request)
+
+    @router.get(
+        "/eval/benchmarks/{benchmark_id}",
+        response_model=Benchmark,
+        summary="Get a benchmark by its ID.",
+        description="Get a benchmark by its ID.",
+        responses={
+            200: {"description": "A Benchmark."},
+        },
+    )
+    async def get_benchmark(
+        request: Annotated[GetBenchmarkRequest, Depends(get_get_benchmark_request)],
+    ) -> Benchmark:
+        return await impl.get_benchmark(request)
+
+    @router.post(
+        "/eval/benchmarks",
+        summary="Register a benchmark.",
+        description="Register a benchmark.",
+        responses={
+            200: {"description": "The benchmark was successfully registered."},
+        },
+        deprecated=True,
+    )
+    async def register_benchmark(
+        request: Annotated[RegisterBenchmarkRequest, Body(...)],
+    ) -> None:
+        return await impl.register_benchmark(request)
+
+    @router.delete(
+        "/eval/benchmarks/{benchmark_id}",
+        summary="Unregister a benchmark.",
+        description="Unregister a benchmark.",
+        responses={
+            200: {"description": "The benchmark was successfully unregistered."},
+        },
+        deprecated=True,
+    )
+    async def unregister_benchmark(
+        request: Annotated[UnregisterBenchmarkRequest, Depends(get_unregister_benchmark_request)],
+    ) -> None:
+        return await impl.unregister_benchmark(request)
+
+    return router
diff --git a/src/llama_stack_api/benchmarks/models.py b/src/llama_stack_api/benchmarks/models.py
new file mode 100644
index 000000000..4d9eeb8c8
--- /dev/null
+++ b/src/llama_stack_api/benchmarks/models.py
@@ -0,0 +1,109 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""Pydantic models for Benchmarks API requests and responses.
+
+This module defines the request and response models for the Benchmarks API
+using Pydantic with Field descriptions for OpenAPI schema generation.
+"""
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+from llama_stack_api.resource import Resource, ResourceType
+from llama_stack_api.schema_utils import json_schema_type
+
+
+@json_schema_type
+class ListBenchmarksRequest(BaseModel):
+    """Request model for listing benchmarks."""
+
+    pass
+
+
+@json_schema_type
+class GetBenchmarkRequest(BaseModel):
+    """Request model for getting a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to get.")
+
+
+@json_schema_type
+class RegisterBenchmarkRequest(BaseModel):
+    """Request model for registering a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to register.")
+    dataset_id: str = Field(..., description="The ID of the dataset to use for the benchmark.")
+    scoring_functions: list[str] = Field(..., description="The scoring functions to use for the benchmark.")
+    provider_benchmark_id: str | None = Field(
+        default=None, description="The ID of the provider benchmark to use for the benchmark."
+    )
+    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
+    metadata: dict[str, Any] | None = Field(default=None, description="The metadata to use for the benchmark.")
+
+
+@json_schema_type
+class UnregisterBenchmarkRequest(BaseModel):
+    """Request model for unregistering a benchmark."""
+
+    benchmark_id: str = Field(..., description="The ID of the benchmark to unregister.")
+
+
+class CommonBenchmarkFields(BaseModel):
+    dataset_id: str = Field(..., description="Identifier of the dataset to use for the benchmark evaluation.")
+    scoring_functions: list[str] = Field(
+        ..., description="List of scoring function identifiers to apply during evaluation."
+    )
+    metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Metadata for this evaluation task.",
+    )
+
+
+@json_schema_type
+class Benchmark(CommonBenchmarkFields, Resource):
+    """A benchmark resource for evaluating model performance."""
+
+    type: Literal[ResourceType.benchmark] = Field(
+        default=ResourceType.benchmark,
+        description="The resource type, always benchmark.",
+    )
+
+    @property
+    def benchmark_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_benchmark_id(self) -> str | None:
+        return self.provider_resource_id
+
+
+class BenchmarkInput(CommonBenchmarkFields, BaseModel):
+    benchmark_id: str = Field(..., description="The ID of the benchmark.")
+    provider_id: str | None = Field(default=None, description="The ID of the provider to use for the benchmark.")
+    provider_benchmark_id: str | None = Field(
+        default=None, description="The ID of the provider benchmark to use for the benchmark."
+    )
+
+
+@json_schema_type
+class ListBenchmarksResponse(BaseModel):
+    """Response containing a list of benchmark objects."""
+
+    data: list[Benchmark] = Field(..., description="List of benchmark objects.")
+
+
+__all__ = [
+    "ListBenchmarksRequest",
+    "GetBenchmarkRequest",
+    "RegisterBenchmarkRequest",
+    "UnregisterBenchmarkRequest",
+    "CommonBenchmarkFields",
+    "Benchmark",
+    "BenchmarkInput",
+    "ListBenchmarksResponse",
+]
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 292ee8384..676d786c2 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -22,14 +22,17 @@ from llama_stack_api import (
     Api,
     Dataset,
     DatasetPurpose,
+    ListBenchmarksRequest,
     ListToolDefsResponse,
     Model,
     ModelNotFoundError,
     ModelType,
     NumberType,
+    RegisterBenchmarkRequest,
     Shield,
     ToolDef,
     ToolGroup,
+    UnregisterBenchmarkRequest,
     URIDataSource,
 )
 
@@ -420,24 +423,26 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
 
     # Register multiple benchmarks and verify listing
     await table.register_benchmark(
-        benchmark_id="test-benchmark",
-        dataset_id="test-dataset",
-        scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
+        RegisterBenchmarkRequest(
+            benchmark_id="test-benchmark",
+            dataset_id="test-dataset",
+            scoring_functions=["test-scoring-fn", "test-scoring-fn-2"],
+        )
     )
-    benchmarks = await table.list_benchmarks()
+    benchmarks = await table.list_benchmarks(ListBenchmarksRequest())
 
     assert len(benchmarks.data) == 1
     benchmark_ids = {b.identifier for b in benchmarks.data}
     assert "test-benchmark" in benchmark_ids
 
     # Unregister the benchmark and verify removal
-    await table.unregister_benchmark(benchmark_id="test-benchmark")
-    benchmarks_after = await table.list_benchmarks()
+    await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="test-benchmark"))
+    benchmarks_after = await table.list_benchmarks(ListBenchmarksRequest())
     assert len(benchmarks_after.data) == 0
 
     # Unregistering a non-existent benchmark should raise a clear error
     with pytest.raises(ValueError, match="Benchmark 'dummy_benchmark' not found"):
-        await table.unregister_benchmark(benchmark_id="dummy_benchmark")
+        await table.unregister_benchmark(UnregisterBenchmarkRequest(benchmark_id="dummy_benchmark"))
 
 
 async def test_tool_groups_routing_table(cached_disk_dist_registry):