diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index ea7a8f210..b93f6a380 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -2699,8 +2699,7 @@
                             "auto",
                             "required"
                         ],
-                        "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
-                        "default": "auto"
+                        "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model."
                     },
                     "tool_prompt_format": {
                         "type": "string",
@@ -2815,6 +2814,11 @@
             "BenchmarkConfig": {
                 "type": "object",
                 "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "benchmark",
+                        "default": "benchmark"
+                    },
                     "eval_candidate": {
                         "$ref": "#/components/schemas/EvalCandidate"
                     },
@@ -2830,6 +2834,7 @@
                 },
                 "additionalProperties": false,
                 "required": [
+                    "type",
                     "eval_candidate",
                     "scoring_params"
                 ]
@@ -3498,17 +3503,7 @@
             "ScoringResult": {
                 "type": "object",
                 "properties": {
-                    "metrics": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricEvent"
-                        }
-                    },
-                    "completion_message": {
-                        "$ref": "#/components/schemas/CompletionMessage",
-                        "description": "The complete response message"
-                    },
-                    "logprobs": {
+                    "score_rows": {
                         "type": "array",
                         "items": {
                             "type": "object",
@@ -3568,75 +3563,7 @@
                     "aggregated_results"
                 ]
             },
-            "MetricEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string"
-                    },
-                    "span_id": {
-                        "type": "string"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        }
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "metric",
-                        "default": "metric"
-                    },
-                    "metric": {
-                        "type": "string"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "number"
-                            }
-                        ]
-                    },
-                    "unit": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "metric",
-                    "value",
-                    "unit"
-                ]
-            },
-            "TokenLogProbs": {
+            "Benchmark": {
                 "type": "object",
                 "properties": {
                     "identifier": {
@@ -4225,6 +4152,12 @@
             "ChatCompletionResponse": {
                 "type": "object",
                 "properties": {
+                    "metrics": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricEvent"
+                        }
+                    },
                     "completion_message": {
                         "$ref": "#/components/schemas/CompletionMessage",
                         "description": "The complete response message"
@@ -4243,6 +4176,74 @@
                 ],
                 "description": "Response from a chat completion request."
             },
+            "MetricEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string"
+                    },
+                    "span_id": {
+                        "type": "string"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        }
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "metric",
+                        "default": "metric"
+                    },
+                    "metric": {
+                        "type": "string"
+                    },
+                    "value": {
+                        "oneOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "number"
+                            }
+                        ]
+                    },
+                    "unit": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "metric",
+                    "value",
+                    "unit"
+                ]
+            },
             "TokenLogProbs": {
                 "type": "object",
                 "properties": {
@@ -4470,6 +4471,12 @@
             "ChatCompletionResponseStreamChunk": {
                 "type": "object",
                 "properties": {
+                    "metrics": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricEvent"
+                        }
+                    },
                     "event": {
                         "$ref": "#/components/schemas/ChatCompletionResponseEvent",
                         "description": "The event containing the new content"
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 19c646bf9..b30025020 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -1641,7 +1641,6 @@ components:
             Whether tool use is required or automatic. This is a hint to the model
             which may not be followed. It depends on the Instruction Following capabilities
             of the model.
-          default: auto
         tool_prompt_format:
           type: string
           enum:
@@ -1713,6 +1712,10 @@ components:
     BenchmarkConfig:
       type: object
       properties:
+        type:
+          type: string
+          const: benchmark
+          default: benchmark
         eval_candidate:
           $ref: '#/components/schemas/EvalCandidate'
         scoring_params:
@@ -1723,6 +1726,7 @@ components:
           type: integer
       additionalProperties: false
       required:
+        - type
         - eval_candidate
         - scoring_params
     EvalCandidate:
@@ -2960,185 +2964,6 @@ components:
         - delta
       description: >-
         A chunk of a streamed completion response.
-    AgentConfig:
-      type: object
-      properties:
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-        input_shields:
-          type: array
-          items:
-            type: string
-        output_shields:
-          type: array
-          items:
-            type: string
-        toolgroups:
-          type: array
-          items:
-            $ref: '#/components/schemas/AgentTool'
-        client_tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolDef'
-        tool_choice:
-          type: string
-          enum:
-            - auto
-            - required
-          description: >-
-            Whether tool use is required or automatic. This is a hint to the model
-            which may not be followed. It depends on the Instruction Following capabilities
-            of the model.
-        tool_prompt_format:
-          type: string
-          enum:
-            - json
-            - function_tag
-            - python_list
-          description: >-
-            Prompt format for calling custom / zero shot tools.
-        tool_config:
-          $ref: '#/components/schemas/ToolConfig'
-        max_infer_iters:
-          type: integer
-          default: 10
-        model:
-          type: string
-        instructions:
-          type: string
-        enable_session_persistence:
-          type: boolean
-        response_format:
-          $ref: '#/components/schemas/ResponseFormat'
-      additionalProperties: false
-      required:
-        - model
-        - instructions
-        - enable_session_persistence
-    AgentTool:
-      oneOf:
-        - type: string
-        - type: object
-          properties:
-            name:
-              type: string
-            args:
-              type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          additionalProperties: false
-          required:
-            - name
-            - args
-    ToolDef:
-      type: object
-      properties:
-        type:
-          type: string
-          const: text
-          default: text
-        text:
-          type: string
-      additionalProperties: false
-      required:
-        - type
-        - text
-    ToolCallDelta:
-      type: object
-      properties:
-        type:
-          type: string
-          const: tool_call
-          default: tool_call
-        tool_call:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/ToolCall'
-        parse_status:
-          type: string
-          enum:
-            - started
-            - in_progress
-            - failed
-            - succeeded
-      additionalProperties: false
-      required:
-        - type
-        - tool_call
-        - parse_status
-    CompletionRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The content to generate a completion for
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-          description: >-
-            (Optional) Parameters to control the sampling strategy
-        response_format:
-          $ref: '#/components/schemas/ResponseFormat'
-          description: >-
-            (Optional) Grammar specification for guided (structured) decoding
-        stream:
-          type: boolean
-          description: >-
-            (Optional) If True, generate an SSE event stream of the response. Defaults
-            to False.
-        logprobs:
-          type: object
-          properties:
-            top_k:
-              type: integer
-              default: 0
-              description: >-
-                How many tokens (for each position) to return log probabilities for.
-          additionalProperties: false
-          description: >-
-            (Optional) If specified, log probabilities for each token position will
-            be returned.
-      additionalProperties: false
-      required:
-        - model_id
-        - content
-    CompletionResponseStreamChunk:
-      type: object
-      properties:
-        delta:
-          type: string
-          description: >-
-            New content generated since last chunk. This can be one or more tokens.
-        stop_reason:
-          type: string
-          enum:
-            - end_of_turn
-            - end_of_message
-            - out_of_tokens
-          description: >-
-            Optional reason why generation stopped, if complete
-        logprobs:
-          type: array
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          description: >-
-            Optional log probabilities for generated tokens
-      additionalProperties: false
-      required:
-        - delta
-      description: >-
-        A chunk of a streamed completion response.
     CreateAgentRequest:
       type: object
       properties: