diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 16847f542..68f27ef3b 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -69,11 +69,12 @@
                 "tags": [
                     "DatasetIO"
                 ],
-                "description": "",
+                "description": "Get a paginated list of rows from a dataset.",
                 "parameters": [
                     {
                         "name": "dataset_id",
                         "in": "query",
+                        "description": "The ID of the dataset to get the rows from.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -82,6 +83,7 @@
                     {
                         "name": "rows_in_page",
                         "in": "query",
+                        "description": "The number of rows to get per page.",
                         "required": true,
                         "schema": {
                             "type": "integer"
@@ -90,6 +92,7 @@
                     {
                         "name": "page_token",
                         "in": "query",
+                        "description": "The token to get the next page of rows.",
                         "required": false,
                         "schema": {
                             "type": "string"
@@ -98,6 +101,7 @@
                     {
                         "name": "filter_condition",
                         "in": "query",
+                        "description": "(Optional) A condition to filter the rows by.",
                         "required": false,
                         "schema": {
                             "type": "string"
@@ -896,7 +900,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "EvaluateResponse object containing generations and scores",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -921,11 +925,12 @@
                 "tags": [
                     "Eval"
                 ],
-                "description": "",
+                "description": "Evaluate a list of rows on a benchmark.",
                 "parameters": [
                     {
                         "name": "benchmark_id",
                         "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2121,7 +2126,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "The status of the evaluationjob.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2153,11 +2158,12 @@
                 "tags": [
                     "Eval"
                 ],
-                "description": "",
+                "description": "Get the status of a job.",
                 "parameters": [
                     {
                         "name": "benchmark_id",
                         "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2166,6 +2172,7 @@
                     {
                         "name": "job_id",
                         "in": "path",
+                        "description": "The ID of the job to get the status of.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2194,11 +2201,12 @@
                 "tags": [
                     "Eval"
                 ],
-                "description": "",
+                "description": "Cancel a job.",
                 "parameters": [
                     {
                         "name": "benchmark_id",
                         "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2207,6 +2215,7 @@
                     {
                         "name": "job_id",
                         "in": "path",
+                        "description": "The ID of the job to cancel.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2219,7 +2228,7 @@
             "get": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "The result of the job.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -2244,11 +2253,12 @@
                 "tags": [
                     "Eval"
                 ],
-                "description": "",
+                "description": "Get the result of a job.",
                 "parameters": [
                     {
                         "name": "benchmark_id",
                         "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -2257,6 +2267,7 @@
                     {
                         "name": "job_id",
                         "in": "path",
+                        "description": "The ID of the job to get the result of.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -3287,7 +3298,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "The job that was created to run the evaluation.",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3312,11 +3323,12 @@
                 "tags": [
                     "Eval"
                 ],
-                "description": "",
+                "description": "Run an evaluation on a benchmark.",
                 "parameters": [
                     {
                         "name": "benchmark_id",
                         "in": "path",
+                        "description": "The ID of the benchmark to run the evaluation on.",
                         "required": true,
                         "schema": {
                             "type": "string"
@@ -3418,7 +3430,7 @@
             "post": {
                 "responses": {
                     "200": {
-                        "description": "OK",
+                        "description": "ScoreResponse object containing rows and aggregated results",
                         "content": {
                             "application/json": {
                                 "schema": {
@@ -3443,7 +3455,7 @@
                 "tags": [
                     "Scoring"
                 ],
-                "description": "",
+                "description": "Score a list of rows.",
                 "parameters": [],
                 "requestBody": {
                     "content": {
@@ -6187,7 +6199,8 @@
                         "default": "agent"
                     },
                     "config": {
-                        "$ref": "#/components/schemas/AgentConfig"
+                        "$ref": "#/components/schemas/AgentConfig",
+                        "description": "The configuration for the agent candidate."
                     }
                 },
                 "additionalProperties": false,
@@ -6195,7 +6208,8 @@
                     "type",
                     "config"
                 ],
-                "title": "AgentCandidate"
+                "title": "AgentCandidate",
+                "description": "An agent candidate for evaluation."
             },
             "AggregationFunctionType": {
                 "type": "string",
@@ -6232,16 +6246,19 @@
                 "type": "object",
                 "properties": {
                     "eval_candidate": {
-                        "$ref": "#/components/schemas/EvalCandidate"
+                        "$ref": "#/components/schemas/EvalCandidate",
+                        "description": "The candidate to evaluate."
                     },
                     "scoring_params": {
                         "type": "object",
                         "additionalProperties": {
                             "$ref": "#/components/schemas/ScoringFnParams"
-                        }
+                        },
+                        "description": "Map between scoring function id and parameters for each scoring function you want to run"
                     },
                     "num_examples": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
                     }
                 },
                 "additionalProperties": false,
@@ -6249,7 +6266,8 @@
                     "eval_candidate",
                     "scoring_params"
                 ],
-                "title": "BenchmarkConfig"
+                "title": "BenchmarkConfig",
+                "description": "A benchmark configuration for evaluation."
             },
             "EvalCandidate": {
                 "oneOf": [
@@ -6311,13 +6329,16 @@
                         "default": "model"
                     },
                     "model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The model ID to evaluate."
                     },
                     "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
+                        "$ref": "#/components/schemas/SamplingParams",
+                        "description": "The sampling parameters for the model."
                     },
                     "system_message": {
-                        "$ref": "#/components/schemas/SystemMessage"
+                        "$ref": "#/components/schemas/SystemMessage",
+                        "description": "(Optional) The system message providing instructions or context to the model."
                     }
                 },
                 "additionalProperties": false,
@@ -6326,7 +6347,8 @@
                     "model",
                     "sampling_params"
                 ],
-                "title": "ModelCandidate"
+                "title": "ModelCandidate",
+                "description": "A model candidate for evaluation."
             },
             "RegexParserScoringFnParams": {
                 "type": "object",
@@ -6405,16 +6427,19 @@
                                     }
                                 ]
                             }
-                        }
+                        },
+                        "description": "The rows to evaluate."
                     },
                     "scoring_functions": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "The scoring functions to use for the evaluation."
                     },
                     "benchmark_config": {
-                        "$ref": "#/components/schemas/BenchmarkConfig"
+                        "$ref": "#/components/schemas/BenchmarkConfig",
+                        "description": "The configuration for the benchmark."
                     }
                 },
                 "additionalProperties": false,
@@ -6454,13 +6479,15 @@
                                     }
                                 ]
                             }
-                        }
+                        },
+                        "description": "The generations from the evaluation."
                     },
                     "scores": {
                         "type": "object",
                         "additionalProperties": {
                             "$ref": "#/components/schemas/ScoringResult"
-                        }
+                        },
+                        "description": "The scores from the evaluation."
                     }
                 },
                 "additionalProperties": false,
@@ -6468,7 +6495,8 @@
                     "generations",
                     "scores"
                 ],
-                "title": "EvaluateResponse"
+                "title": "EvaluateResponse",
+                "description": "The response from an evaluation."
             },
             "ScoringResult": {
                 "type": "object",
@@ -6499,7 +6527,8 @@
                                     }
                                 ]
                             }
-                        }
+                        },
+                        "description": "The scoring result for each row. Each row is a map of column name to value."
                     },
                     "aggregated_results": {
                         "type": "object",
@@ -6524,7 +6553,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "Map of metric name to aggregated value"
                     }
                 },
                 "additionalProperties": false,
@@ -6532,7 +6562,8 @@
                     "score_rows",
                     "aggregated_results"
                 ],
-                "title": "ScoringResult"
+                "title": "ScoringResult",
+                "description": "A scoring result for a single row."
             },
             "Session": {
                 "type": "object",
@@ -7021,13 +7052,16 @@
                                     }
                                 ]
                             }
-                        }
+                        },
+                        "description": "The rows in the current page."
                     },
                     "total_count": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The total number of rows in the dataset."
                     },
                     "next_page_token": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The token to get the next page of rows."
                     }
                 },
                 "additionalProperties": false,
@@ -7035,7 +7069,8 @@
                     "rows",
                     "total_count"
                 ],
-                "title": "PaginatedRowsResult"
+                "title": "PaginatedRowsResult",
+                "description": "A paginated list of rows from a dataset."
             },
             "ScoringFn": {
                 "type": "object",
@@ -9307,7 +9342,8 @@
                 "type": "object",
                 "properties": {
                     "benchmark_config": {
-                        "$ref": "#/components/schemas/BenchmarkConfig"
+                        "$ref": "#/components/schemas/BenchmarkConfig",
+                        "description": "The configuration for the benchmark."
                     }
                 },
                 "additionalProperties": false,
@@ -9444,7 +9480,8 @@
                                     }
                                 ]
                             }
-                        }
+                        },
+                        "description": "The rows to score."
                     },
                     "scoring_functions": {
                         "type": "object",
@@ -9457,7 +9494,8 @@
                                     "type": "null"
                                 }
                             ]
-                        }
+                        },
+                        "description": "The scoring functions to use for the scoring."
                     }
                 },
                 "additionalProperties": false,
@@ -9474,14 +9512,16 @@
                         "type": "object",
                         "additionalProperties": {
                             "$ref": "#/components/schemas/ScoringResult"
-                        }
+                        },
+                        "description": "A map of scoring function name to ScoringResult."
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "results"
                 ],
-                "title": "ScoreResponse"
+                "title": "ScoreResponse",
+                "description": "The response from scoring."
             },
             "ScoreBatchRequest": {
                 "type": "object",
@@ -9896,7 +9936,8 @@
             "name": "Datasets"
         },
         {
-            "name": "Eval"
+            "name": "Eval",
+            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
         },
         {
             "name": "Files (Coming Soon)"
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index cfca894fb..bb994b0c5 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -31,25 +31,32 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - DatasetIO
-      description: ''
+      description: >-
+        Get a paginated list of rows from a dataset.
       parameters:
         - name: dataset_id
           in: query
+          description: >-
+            The ID of the dataset to get the rows from.
           required: true
           schema:
             type: string
         - name: rows_in_page
           in: query
+          description: The number of rows to get per page.
           required: true
           schema:
             type: integer
         - name: page_token
           in: query
+          description: The token to get the next page of rows.
           required: false
           schema:
             type: string
         - name: filter_condition
           in: query
+          description: >-
+            (Optional) A condition to filter the rows by.
           required: false
           schema:
             type: string
@@ -613,7 +620,8 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            EvaluateResponse object containing generations and scores
           content:
             application/json:
               schema:
@@ -630,10 +638,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
-      description: ''
+      description: Evaluate a list of rows on a benchmark.
       parameters:
         - name: benchmark_id
           in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
           required: true
           schema:
             type: string
@@ -1417,7 +1427,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: The status of the evaluationjob.
           content:
             application/json:
               schema:
@@ -1436,15 +1446,18 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
-      description: ''
+      description: Get the status of a job.
       parameters:
         - name: benchmark_id
           in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
           required: true
           schema:
             type: string
         - name: job_id
           in: path
+          description: The ID of the job to get the status of.
           required: true
           schema:
             type: string
@@ -1464,15 +1477,18 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
-      description: ''
+      description: Cancel a job.
       parameters:
         - name: benchmark_id
           in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
           required: true
           schema:
             type: string
         - name: job_id
           in: path
+          description: The ID of the job to cancel.
           required: true
           schema:
             type: string
@@ -1480,7 +1496,7 @@ paths:
     get:
       responses:
         '200':
-          description: OK
+          description: The result of the job.
           content:
             application/json:
               schema:
@@ -1497,15 +1513,18 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
-      description: ''
+      description: Get the result of a job.
       parameters:
         - name: benchmark_id
           in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
           required: true
           schema:
             type: string
         - name: job_id
           in: path
+          description: The ID of the job to get the result of.
           required: true
           schema:
             type: string
@@ -2218,7 +2237,8 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            The job that was created to run the evaluation.
           content:
             application/json:
               schema:
@@ -2235,10 +2255,12 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
-      description: ''
+      description: Run an evaluation on a benchmark.
       parameters:
         - name: benchmark_id
           in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
           required: true
           schema:
             type: string
@@ -2306,7 +2328,8 @@ paths:
     post:
       responses:
         '200':
-          description: OK
+          description: >-
+            ScoreResponse object containing rows and aggregated results
           content:
             application/json:
               schema:
@@ -2323,7 +2346,7 @@ paths:
           $ref: '#/components/responses/DefaultError'
       tags:
         - Scoring
-      description: ''
+      description: Score a list of rows.
       parameters: []
       requestBody:
         content:
@@ -4290,11 +4313,14 @@ components:
           default: agent
         config:
           $ref: '#/components/schemas/AgentConfig'
+          description: >-
+            The configuration for the agent candidate.
       additionalProperties: false
       required:
         - type
         - config
       title: AgentCandidate
+      description: An agent candidate for evaluation.
     AggregationFunctionType:
       type: string
       enum:
@@ -4323,17 +4349,26 @@ components:
       properties:
         eval_candidate:
           $ref: '#/components/schemas/EvalCandidate'
+          description: The candidate to evaluate.
         scoring_params:
           type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            Map between scoring function id and parameters for each scoring function
+            you want to run
         num_examples:
           type: integer
+          description: >-
+            (Optional) The number of examples to evaluate. If not provided, all examples
+            in the dataset will be evaluated
       additionalProperties: false
       required:
         - eval_candidate
         - scoring_params
       title: BenchmarkConfig
+      description: >-
+        A benchmark configuration for evaluation.
     EvalCandidate:
       oneOf:
         - $ref: '#/components/schemas/ModelCandidate'
@@ -4376,16 +4411,22 @@ components:
           default: model
         model:
           type: string
+          description: The model ID to evaluate.
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
+          description: The sampling parameters for the model.
         system_message:
           $ref: '#/components/schemas/SystemMessage'
+          description: >-
+            (Optional) The system message providing instructions or context to the
+            model.
       additionalProperties: false
       required:
         - type
         - model
         - sampling_params
       title: ModelCandidate
+      description: A model candidate for evaluation.
     RegexParserScoringFnParams:
       type: object
       properties:
@@ -4431,12 +4472,16 @@ components:
                 - type: string
                 - type: array
                 - type: object
+          description: The rows to evaluate.
         scoring_functions:
           type: array
           items:
             type: string
+          description: >-
+            The scoring functions to use for the evaluation.
         benchmark_config:
           $ref: '#/components/schemas/BenchmarkConfig'
+          description: The configuration for the benchmark.
       additionalProperties: false
       required:
         - input_rows
@@ -4458,15 +4503,18 @@ components:
                 - type: string
                 - type: array
                 - type: object
+          description: The generations from the evaluation.
         scores:
           type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
+          description: The scores from the evaluation.
       additionalProperties: false
       required:
         - generations
         - scores
       title: EvaluateResponse
+      description: The response from an evaluation.
     ScoringResult:
       type: object
       properties:
@@ -4482,6 +4530,8 @@ components:
                 - type: string
                 - type: array
                 - type: object
+          description: >-
+            The scoring result for each row. Each row is a map of column name to value.
         aggregated_results:
           type: object
           additionalProperties:
@@ -4492,11 +4542,13 @@ components:
               - type: string
               - type: array
               - type: object
+          description: Map of metric name to aggregated value
       additionalProperties: false
       required:
         - score_rows
         - aggregated_results
       title: ScoringResult
+      description: A scoring result for a single row.
     Session:
       type: object
       properties:
@@ -4809,15 +4861,19 @@ components:
                 - type: string
                 - type: array
                 - type: object
+          description: The rows in the current page.
         total_count:
           type: integer
+          description: The total number of rows in the dataset.
         next_page_token:
           type: string
+          description: The token to get the next page of rows.
       additionalProperties: false
       required:
         - rows
         - total_count
       title: PaginatedRowsResult
+      description: A paginated list of rows from a dataset.
     ScoringFn:
       type: object
       properties:
@@ -6248,6 +6304,7 @@ components:
       properties:
         benchmark_config:
           $ref: '#/components/schemas/BenchmarkConfig'
+          description: The configuration for the benchmark.
       additionalProperties: false
       required:
         - benchmark_config
@@ -6329,12 +6386,15 @@ components:
                 - type: string
                 - type: array
                 - type: object
+          description: The rows to score.
         scoring_functions:
           type: object
           additionalProperties:
             oneOf:
               - $ref: '#/components/schemas/ScoringFnParams'
               - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
       additionalProperties: false
       required:
         - input_rows
@@ -6347,10 +6407,13 @@ components:
           type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            A map of scoring function name to ScoringResult.
       additionalProperties: false
       required:
         - results
       title: ScoreResponse
+      description: The response from scoring.
     ScoreBatchRequest:
       type: object
       properties:
@@ -6621,6 +6684,8 @@ tags:
   - name: DatasetIO
   - name: Datasets
   - name: Eval
+    x-displayName: >-
+      Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: Files (Coming Soon)
   - name: Inference
     description: >-
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index d85d22876..6a04a6329 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -14,6 +14,14 @@ from llama_stack.schema_utils import json_schema_type, webmethod
 
 @json_schema_type
 class PaginatedRowsResult(BaseModel):
+    """
+    A paginated list of rows from a dataset.
+
+    :param rows: The rows in the current page.
+    :param total_count: The total number of rows in the dataset.
+    :param next_page_token: The token to get the next page of rows.
+    """
+
     # the rows obey the DatasetSchema for the given dataset
     rows: List[Dict[str, Any]]
     total_count: int
@@ -36,7 +44,15 @@ class DatasetIO(Protocol):
         rows_in_page: int,
         page_token: Optional[str] = None,
         filter_condition: Optional[str] = None,
-    ) -> PaginatedRowsResult: ...
+    ) -> PaginatedRowsResult:
+        """Get a paginated list of rows from a dataset.
+
+        :param dataset_id: The ID of the dataset to get the rows from.
+        :param rows_in_page: The number of rows to get per page.
+        :param page_token: The token to get the next page of rows.
+        :param filter_condition: (Optional) A condition to filter the rows by.
+        """
+        ...
 
     @webmethod(route="/datasetio/rows", method="POST")
     async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 40a3b750a..dec018d83 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -19,6 +19,13 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
 
 @json_schema_type
 class ModelCandidate(BaseModel):
+    """A model candidate for evaluation.
+
+    :param model: The model ID to evaluate.
+    :param sampling_params: The sampling parameters for the model.
+    :param system_message: (Optional) The system message providing instructions or context to the model.
+    """
+
     type: Literal["model"] = "model"
     model: str
     sampling_params: SamplingParams
@@ -27,6 +34,11 @@ class ModelCandidate(BaseModel):
 
 @json_schema_type
 class AgentCandidate(BaseModel):
+    """An agent candidate for evaluation.
+
+    :param config: The configuration for the agent candidate.
+    """
+
     type: Literal["agent"] = "agent"
     config: AgentConfig
 
@@ -39,6 +51,13 @@ EvalCandidate = register_schema(
 
 @json_schema_type
 class BenchmarkConfig(BaseModel):
+    """A benchmark configuration for evaluation.
+
+    :param eval_candidate: The candidate to evaluate.
+    :param scoring_params: Map between scoring function id and parameters for each scoring function you want to run
+    :param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated
+    """
+
     eval_candidate: EvalCandidate
     scoring_params: Dict[str, ScoringFnParams] = Field(
         description="Map between scoring function id and parameters for each scoring function you want to run",
@@ -53,18 +72,32 @@ class BenchmarkConfig(BaseModel):
 
 @json_schema_type
 class EvaluateResponse(BaseModel):
+    """The response from an evaluation.
+
+    :param generations: The generations from the evaluation.
+    :param scores: The scores from the evaluation.
+    """
+
     generations: List[Dict[str, Any]]
     # each key in the dict is a scoring function name
     scores: Dict[str, ScoringResult]
 
 
 class Eval(Protocol):
+    """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
+
     @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
     async def run_eval(
         self,
         benchmark_id: str,
         benchmark_config: BenchmarkConfig,
-    ) -> Job: ...
+    ) -> Job:
+        """Run an evaluation on a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param benchmark_config: The configuration for the benchmark.
+        :return: The job that was created to run the evaluation.
+        """
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
     async def evaluate_rows(
@@ -73,13 +106,40 @@ class Eval(Protocol):
         input_rows: List[Dict[str, Any]],
         scoring_functions: List[str],
         benchmark_config: BenchmarkConfig,
-    ) -> EvaluateResponse: ...
+    ) -> EvaluateResponse:
+        """Evaluate a list of rows on a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param input_rows: The rows to evaluate.
+        :param scoring_functions: The scoring functions to use for the evaluation.
+        :param benchmark_config: The configuration for the benchmark.
+        :return: EvaluateResponse object containing generations and scores
+        """
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
-    async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: ...
+    async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
+        """Get the status of a job.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param job_id: The ID of the job to get the status of.
+        :return: The status of the evaluationjob.
+        """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
-    async def job_cancel(self, benchmark_id: str, job_id: str) -> None: ...
+    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+        """Cancel a job.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param job_id: The ID of the job to cancel.
+        """
+        ...
 
     @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
-    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ...
+    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+        """Get the result of a job.
+
+        :param benchmark_id: The ID of the benchmark to run the evaluation on.
+        :param job_id: The ID of the job to get the result of.
+        :return: The result of the job.
+        """
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index 960149476..54a9ac2aa 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -17,6 +17,13 @@ ScoringResultRow = Dict[str, Any]
 
 @json_schema_type
 class ScoringResult(BaseModel):
+    """
+    A scoring result for a single row.
+
+    :param score_rows: The scoring result for each row. Each row is a map of column name to value.
+    :param aggregated_results: Map of metric name to aggregated value
+    """
+
     score_rows: List[ScoringResultRow]
     # aggregated metrics to value
     aggregated_results: Dict[str, Any]
@@ -30,6 +37,12 @@ class ScoreBatchResponse(BaseModel):
 
 @json_schema_type
 class ScoreResponse(BaseModel):
+    """
+    The response from scoring.
+
+    :param results: A map of scoring function name to ScoringResult.
+    """
+
     # each key in the dict is a scoring function name
     results: Dict[str, ScoringResult]
 
@@ -55,4 +68,11 @@ class Scoring(Protocol):
         self,
         input_rows: List[Dict[str, Any]],
         scoring_functions: Dict[str, Optional[ScoringFnParams]],
-    ) -> ScoreResponse: ...
+    ) -> ScoreResponse:
+        """Score a list of rows.
+
+        :param input_rows: The rows to score.
+        :param scoring_functions: The scoring functions to use for the scoring.
+        :return: ScoreResponse object containing rows and aggregated results
+        """
+        ...