diff --git a/api_update_plan.md b/api_update_plan.md
index ffda31b00..20a8938e1 100644
--- a/api_update_plan.md
+++ b/api_update_plan.md
@@ -231,9 +231,9 @@ Before finalizing documentation, verify:
 [x] 10. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasets/datasets.py` - Dataset management
 [x] 11. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/datasetio/datasetio.py` - Dataset I/O operations
 [x] 12. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/post_training/post_training.py` - Training and fine-tuning
-13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
-14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
-15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
+[x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
+[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
+[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
 17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
 18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index aa3fe644b..264b2e6b4 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -9301,7 +9301,8 @@
                     "categorical_count",
                     "accuracy"
                 ],
-                "title": "AggregationFunctionType"
+                "title": "AggregationFunctionType",
+                "description": "Types of aggregation functions for scoring results."
             },
             "BasicScoringFnParams": {
                 "type": "object",
@@ -9309,13 +9310,15 @@
                     "type": {
                         "$ref": "#/components/schemas/ScoringFnParamsType",
                         "const": "basic",
-                        "default": "basic"
+                        "default": "basic",
+                        "description": "The type of scoring function parameters, always basic"
                     },
                     "aggregation_functions": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/AggregationFunctionType"
-                        }
+                        },
+                        "description": "Aggregation functions to apply to the scores of each row"
                     }
                 },
                 "additionalProperties": false,
@@ -9323,7 +9326,8 @@
                     "type",
                     "aggregation_functions"
                 ],
-                "title": "BasicScoringFnParams"
+                "title": "BasicScoringFnParams",
+                "description": "Parameters for basic scoring function configuration."
             },
             "BenchmarkConfig": {
                 "type": "object",
@@ -9375,25 +9379,30 @@
                     "type": {
                         "$ref": "#/components/schemas/ScoringFnParamsType",
                         "const": "llm_as_judge",
-                        "default": "llm_as_judge"
+                        "default": "llm_as_judge",
+                        "description": "The type of scoring function parameters, always llm_as_judge"
                     },
                     "judge_model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Identifier of the LLM model to use as a judge for scoring"
                     },
                     "prompt_template": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Custom prompt template for the judge model"
                     },
                     "judge_score_regexes": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "Regexes to extract the answer from generated response"
                     },
                     "aggregation_functions": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/AggregationFunctionType"
-                        }
+                        },
+                        "description": "Aggregation functions to apply to the scores of each row"
                     }
                 },
                 "additionalProperties": false,
@@ -9403,7 +9412,8 @@
                     "judge_score_regexes",
                     "aggregation_functions"
                 ],
-                "title": "LLMAsJudgeScoringFnParams"
+                "title": "LLMAsJudgeScoringFnParams",
+                "description": "Parameters for LLM-as-judge scoring function configuration."
             },
             "ModelCandidate": {
                 "type": "object",
@@ -9441,19 +9451,22 @@
                     "type": {
                         "$ref": "#/components/schemas/ScoringFnParamsType",
                         "const": "regex_parser",
-                        "default": "regex_parser"
+                        "default": "regex_parser",
+                        "description": "The type of scoring function parameters, always regex_parser"
                     },
                     "parsing_regexes": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "Regex to extract the answer from generated response"
                     },
                     "aggregation_functions": {
                         "type": "array",
                         "items": {
                             "$ref": "#/components/schemas/AggregationFunctionType"
-                        }
+                        },
+                        "description": "Aggregation functions to apply to the scores of each row"
                     }
                 },
                 "additionalProperties": false,
@@ -9462,7 +9475,8 @@
                     "parsing_regexes",
                     "aggregation_functions"
                 ],
-                "title": "RegexParserScoringFnParams"
+                "title": "RegexParserScoringFnParams",
+                "description": "Parameters for regex parser scoring function configuration."
             },
             "ScoringFnParams": {
                 "oneOf": [
@@ -9492,7 +9506,8 @@
                     "regex_parser",
                     "basic"
                 ],
-                "title": "ScoringFnParamsType"
+                "title": "ScoringFnParamsType",
+                "description": "Types of scoring function parameter configurations."
             },
             "EvaluateRowsRequest": {
                 "type": "object",
@@ -10765,9 +10780,9 @@
                             "tool",
                             "tool_group"
                         ],
-                        "title": "ResourceType",
                         "const": "scoring_function",
-                        "default": "scoring_function"
+                        "default": "scoring_function",
+                        "description": "The resource type, always scoring_function"
                     },
                     "description": {
                         "type": "string"
@@ -10812,7 +10827,8 @@
                     "metadata",
                     "return_type"
                 ],
-                "title": "ScoringFn"
+                "title": "ScoringFn",
+                "description": "A scoring function resource for evaluating model outputs."
             },
             "StringType": {
                 "type": "object",
@@ -16105,20 +16121,23 @@
                 "type": "object",
                 "properties": {
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) The identifier of the dataset that was scored"
                     },
                     "results": {
                         "type": "object",
                         "additionalProperties": {
                             "$ref": "#/components/schemas/ScoringResult"
-                        }
+                        },
+                        "description": "A map of scoring function name to ScoringResult"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "results"
                 ],
-                "title": "ScoreBatchResponse"
+                "title": "ScoreBatchResponse",
+                "description": "Response from batch scoring operations on datasets."
             },
             "AlgorithmConfig": {
                 "oneOf": [
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index ce7a7293f..d24276596 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -6681,6 +6681,8 @@ components:
         - categorical_count
         - accuracy
       title: AggregationFunctionType
+      description: >-
+        Types of aggregation functions for scoring results.
     BasicScoringFnParams:
       type: object
       properties:
@@ -6688,15 +6690,21 @@ components:
           $ref: '#/components/schemas/ScoringFnParamsType'
           const: basic
           default: basic
+          description: >-
+            The type of scoring function parameters, always basic
         aggregation_functions:
           type: array
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
+          description: >-
+            Aggregation functions to apply to the scores of each row
       additionalProperties: false
       required:
         - type
         - aggregation_functions
       title: BasicScoringFnParams
+      description: >-
+        Parameters for basic scoring function configuration.
     BenchmarkConfig:
       type: object
       properties:
@@ -6738,18 +6746,28 @@ components:
           $ref: '#/components/schemas/ScoringFnParamsType'
           const: llm_as_judge
           default: llm_as_judge
+          description: >-
+            The type of scoring function parameters, always llm_as_judge
         judge_model:
           type: string
+          description: >-
+            Identifier of the LLM model to use as a judge for scoring
         prompt_template:
           type: string
+          description: >-
+            (Optional) Custom prompt template for the judge model
         judge_score_regexes:
           type: array
           items:
             type: string
+          description: >-
+            Regexes to extract the answer from generated response
         aggregation_functions:
           type: array
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
+          description: >-
+            Aggregation functions to apply to the scores of each row
       additionalProperties: false
       required:
         - type
@@ -6757,6 +6775,8 @@ components:
         - judge_score_regexes
         - aggregation_functions
       title: LLMAsJudgeScoringFnParams
+      description: >-
+        Parameters for LLM-as-judge scoring function configuration.
     ModelCandidate:
       type: object
       properties:
@@ -6789,20 +6809,28 @@ components:
           $ref: '#/components/schemas/ScoringFnParamsType'
           const: regex_parser
           default: regex_parser
+          description: >-
+            The type of scoring function parameters, always regex_parser
         parsing_regexes:
           type: array
           items:
             type: string
+          description: >-
+            Regex to extract the answer from generated response
         aggregation_functions:
           type: array
           items:
             $ref: '#/components/schemas/AggregationFunctionType'
+          description: >-
+            Aggregation functions to apply to the scores of each row
       additionalProperties: false
       required:
         - type
         - parsing_regexes
         - aggregation_functions
       title: RegexParserScoringFnParams
+      description: >-
+        Parameters for regex parser scoring function configuration.
     ScoringFnParams:
       oneOf:
         - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
@@ -6821,6 +6849,8 @@ components:
         - regex_parser
         - basic
       title: ScoringFnParamsType
+      description: >-
+        Types of scoring function parameter configurations.
     EvaluateRowsRequest:
       type: object
       properties:
@@ -7742,9 +7772,10 @@ components:
             - benchmark
             - tool
             - tool_group
-          title: ResourceType
           const: scoring_function
           default: scoring_function
+          description: >-
+            The resource type, always scoring_function
         description:
           type: string
         metadata:
@@ -7769,6 +7800,8 @@ components:
         - metadata
         - return_type
       title: ScoringFn
+      description: >-
+        A scoring function resource for evaluating model outputs.
     StringType:
       type: object
       properties:
@@ -11587,14 +11620,20 @@ components:
       properties:
         dataset_id:
           type: string
+          description: >-
+            (Optional) The identifier of the dataset that was scored
         results:
           type: object
           additionalProperties:
             $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            A map of scoring function name to ScoringResult
       additionalProperties: false
       required:
         - results
       title: ScoreBatchResponse
+      description: >-
+        Response from batch scoring operations on datasets.
     AlgorithmConfig:
       oneOf:
         - $ref: '#/components/schemas/LoraFinetuningConfig'
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index 732e80e79..f4dc017a2 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -31,6 +31,11 @@ class ScoringResult(BaseModel):
 
 @json_schema_type
 class ScoreBatchResponse(BaseModel):
+    """Response from batch scoring operations on datasets.
+
+    :param dataset_id: (Optional) The identifier of the dataset that was scored
+    :param results: A map of scoring function name to ScoringResult
+    """
     dataset_id: str | None = None
     results: dict[str, ScoringResult]
 
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index 684041308..72bf1a42e 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -25,6 +25,12 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
 # with standard metrics so they can be rolled up?
 @json_schema_type
 class ScoringFnParamsType(StrEnum):
+    """Types of scoring function parameter configurations.
+    
+    :cvar llm_as_judge: Use an LLM model to evaluate and score responses
+    :cvar regex_parser: Use regex patterns to extract and score specific parts of responses
+    :cvar basic: Basic scoring with simple aggregation functions
+    """
     llm_as_judge = "llm_as_judge"
     regex_parser = "regex_parser"
     basic = "basic"
@@ -32,6 +38,14 @@ class ScoringFnParamsType(StrEnum):
 
 @json_schema_type
 class AggregationFunctionType(StrEnum):
+    """Types of aggregation functions for scoring results.
+    
+    :cvar average: Calculate the arithmetic mean of scores
+    :cvar weighted_average: Calculate a weighted average of scores
+    :cvar median: Calculate the median value of scores
+    :cvar categorical_count: Count occurrences of categorical values
+    :cvar accuracy: Calculate accuracy as the proportion of correct answers
+    """
     average = "average"
     weighted_average = "weighted_average"
     median = "median"
@@ -41,6 +55,14 @@ class AggregationFunctionType(StrEnum):
 
 @json_schema_type
 class LLMAsJudgeScoringFnParams(BaseModel):
+    """Parameters for LLM-as-judge scoring function configuration.
+    
+    :param type: The type of scoring function parameters, always llm_as_judge
+    :param judge_model: Identifier of the LLM model to use as a judge for scoring
+    :param prompt_template: (Optional) Custom prompt template for the judge model
+    :param judge_score_regexes: Regexes to extract the answer from generated response
+    :param aggregation_functions: Aggregation functions to apply to the scores of each row
+    """
     type: Literal[ScoringFnParamsType.llm_as_judge] = ScoringFnParamsType.llm_as_judge
     judge_model: str
     prompt_template: str | None = None
@@ -56,6 +78,12 @@ class LLMAsJudgeScoringFnParams(BaseModel):
 
 @json_schema_type
 class RegexParserScoringFnParams(BaseModel):
+    """Parameters for regex parser scoring function configuration.
+    
+    :param type: The type of scoring function parameters, always regex_parser
+    :param parsing_regexes: Regex to extract the answer from generated response
+    :param aggregation_functions: Aggregation functions to apply to the scores of each row
+    """
     type: Literal[ScoringFnParamsType.regex_parser] = ScoringFnParamsType.regex_parser
     parsing_regexes: list[str] = Field(
         description="Regex to extract the answer from generated response",
@@ -69,6 +97,11 @@ class RegexParserScoringFnParams(BaseModel):
 
 @json_schema_type
 class BasicScoringFnParams(BaseModel):
+    """Parameters for basic scoring function configuration.
+    
+    :param type: The type of scoring function parameters, always basic
+    :param aggregation_functions: Aggregation functions to apply to the scores of each row
+    """
     type: Literal[ScoringFnParamsType.basic] = ScoringFnParamsType.basic
     aggregation_functions: list[AggregationFunctionType] = Field(
         description="Aggregation functions to apply to the scores of each row",
@@ -100,6 +133,10 @@ class CommonScoringFnFields(BaseModel):
 
 @json_schema_type
 class ScoringFn(CommonScoringFnFields, Resource):
+    """A scoring function resource for evaluating model outputs.
+    
+    :param type: The resource type, always scoring_function
+    """
     type: Literal[ResourceType.scoring_function] = ResourceType.scoring_function
 
     @property