From bcdb6fcc1503fe2c40cf63da8f03a2956c193959 Mon Sep 17 00:00:00 2001
From: Sai Soundararaj <s.saiprashanth@gmail.com>
Date: Tue, 1 Jul 2025 17:03:45 -0700
Subject: [PATCH] a

---
 api_update_plan.md                        |  2 +-
 docs/_static/llama-stack-spec.html        | 16 ++++++++++------
 docs/_static/llama-stack-spec.yaml        |  9 ++++++++-
 llama_stack/apis/benchmarks/benchmarks.py |  7 +++++++
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/api_update_plan.md b/api_update_plan.md
index 20a8938e1..ec93f1440 100644
--- a/api_update_plan.md
+++ b/api_update_plan.md
@@ -234,7 +234,7 @@ Before finalizing documentation, verify:
 [x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
 [x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
 [x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
-16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
+[x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
 17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
 18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
 19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 264b2e6b4..c28135dd2 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -9797,18 +9797,20 @@
                             "tool",
                             "tool_group"
                         ],
-                        "title": "ResourceType",
                         "const": "benchmark",
-                        "default": "benchmark"
+                        "default": "benchmark",
+                        "description": "The resource type, always benchmark"
                     },
                     "dataset_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Identifier of the dataset to use for the benchmark evaluation"
                     },
                     "scoring_functions": {
                         "type": "array",
                         "items": {
                             "type": "string"
-                        }
+                        },
+                        "description": "List of scoring function identifiers to apply during evaluation"
                     },
                     "metadata": {
                         "type": "object",
@@ -9833,7 +9835,8 @@
                                     "type": "object"
                                 }
                             ]
-                        }
+                        },
+                        "description": "Metadata for this evaluation task"
                     }
                 },
                 "additionalProperties": false,
@@ -9845,7 +9848,8 @@
                     "scoring_functions",
                     "metadata"
                 ],
-                "title": "Benchmark"
+                "title": "Benchmark",
+                "description": "A benchmark resource for evaluating model performance."
             },
             "OpenAIAssistantMessageParam": {
                 "type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index d24276596..995ff8601 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -7037,15 +7037,19 @@ components:
             - benchmark
             - tool
             - tool_group
-          title: ResourceType
           const: benchmark
           default: benchmark
+          description: The resource type, always benchmark
         dataset_id:
           type: string
+          description: >-
+            Identifier of the dataset to use for the benchmark evaluation
         scoring_functions:
           type: array
           items:
             type: string
+          description: >-
+            List of scoring function identifiers to apply during evaluation
         metadata:
           type: object
           additionalProperties:
@@ -7056,6 +7060,7 @@ components:
               - type: string
               - type: array
               - type: object
+          description: Metadata for this evaluation task
       additionalProperties: false
       required:
         - identifier
@@ -7065,6 +7070,8 @@ components:
         - scoring_functions
         - metadata
       title: Benchmark
+      description: >-
+        A benchmark resource for evaluating model performance.
     OpenAIAssistantMessageParam:
       type: object
       properties:
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py
index d80c767f8..ff36007c6 100644
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@@ -22,6 +22,13 @@ class CommonBenchmarkFields(BaseModel):
 
 @json_schema_type
 class Benchmark(CommonBenchmarkFields, Resource):
+    """A benchmark resource for evaluating model performance.
+
+    :param dataset_id: Identifier of the dataset to use for the benchmark evaluation
+    :param scoring_functions: List of scoring function identifiers to apply during evaluation
+    :param metadata: Metadata for this evaluation task
+    :param type: The resource type, always benchmark
+    """
     type: Literal[ResourceType.benchmark] = ResourceType.benchmark
 
     @property