docs: api documentation for agents/eval/scoring/datasets (#1400)

# What does this PR do? - add some docs to OpenAPI for agents/eval/scoring/datasetio [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - read [//]: # (## Documentation)
2025-03-05 09:40:24 -08:00 · 2025-03-05 09:40:24 -08:00 · 3d9331840e
commit 3d9331840e
parent 0d18274d34
6 changed files with 586 additions and 137 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -69,11 +69,12 @@
                "tags": [
                    "DatasetIO"
                ],
-                "description": "",
+                "description": "Get a paginated list of rows from a dataset.",
                "parameters": [
                    {
                        "name": "dataset_id",
                        "in": "query",
                        "description": "The ID of the dataset to get the rows from.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -82,6 +83,7 @@
                    {
                        "name": "rows_in_page",
                        "in": "query",
                        "description": "The number of rows to get per page.",
                        "required": true,
                        "schema": {
                            "type": "integer"
@ -90,6 +92,7 @@
                    {
                        "name": "page_token",
                        "in": "query",
                        "description": "The token to get the next page of rows.",
                        "required": false,
                        "schema": {
                            "type": "string"
@ -98,6 +101,7 @@
                    {
                        "name": "filter_condition",
                        "in": "query",
                        "description": "(Optional) A condition to filter the rows by.",
                        "required": false,
                        "schema": {
                            "type": "string"
@ -362,7 +366,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "An AgentCreateResponse with the agent ID.",
                        "content": {
                            "application/json": {
                                "schema": {
@ -387,7 +391,7 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Create an agent with the given configuration.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -405,7 +409,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "An AgentSessionCreateResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
@ -430,11 +434,12 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Create a new session for an agent.",
                "parameters": [
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to create the session for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -457,7 +462,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
+                        "description": "If stream=False, returns a Turn object. If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk",
                        "content": {
                            "application/json": {
                                "schema": {
@ -487,11 +492,12 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Create a new turn for an agent.",
                "parameters": [
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to create the turn for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -500,6 +506,7 @@
                    {
                        "name": "session_id",
                        "in": "path",
                        "description": "The ID of the session to create the turn for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -623,11 +630,12 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Delete an agent by its ID.",
                "parameters": [
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to delete.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -665,11 +673,12 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Retrieve an agent session by its ID.",
                "parameters": [
                    {
                        "name": "session_id",
                        "in": "path",
                        "description": "The ID of the session to get.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -678,6 +687,7 @@
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to get the session for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -686,6 +696,7 @@
                    {
                        "name": "turn_ids",
                        "in": "query",
                        "description": "(Optional) List of turn IDs to filter the session by.",
                        "required": false,
                        "schema": {
                            "type": "array",
@ -717,11 +728,12 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Delete an agent session by its ID.",
                "parameters": [
                    {
                        "name": "session_id",
                        "in": "path",
                        "description": "The ID of the session to delete.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -730,6 +742,7 @@
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to delete the session for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -887,7 +900,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "EvaluateResponse object containing generations and scores",
                        "content": {
                            "application/json": {
                                "schema": {
@ -912,11 +925,12 @@
                "tags": [
                    "Eval"
                ],
-                "description": "",
+                "description": "Evaluate a list of rows on a benchmark.",
                "parameters": [
                    {
                        "name": "benchmark_id",
                        "in": "path",
                        "description": "The ID of the benchmark to run the evaluation on.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -939,7 +953,7 @@
            "get": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "An AgentStepResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
@ -964,11 +978,12 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Retrieve an agent step by its ID.",
                "parameters": [
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to get the step for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -977,6 +992,7 @@
                    {
                        "name": "session_id",
                        "in": "path",
                        "description": "The ID of the session to get the step for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -985,6 +1001,7 @@
                    {
                        "name": "turn_id",
                        "in": "path",
                        "description": "The ID of the turn to get the step for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -993,6 +1010,7 @@
                    {
                        "name": "step_id",
                        "in": "path",
                        "description": "The ID of the step to get.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -1005,7 +1023,7 @@
            "get": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "A Turn.",
                        "content": {
                            "application/json": {
                                "schema": {
@ -1030,11 +1048,12 @@
                "tags": [
                    "Agents"
                ],
-                "description": "",
+                "description": "Retrieve an agent turn by its ID.",
                "parameters": [
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to get the turn for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -1043,6 +1062,7 @@
                    {
                        "name": "session_id",
                        "in": "path",
                        "description": "The ID of the session to get the turn for.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -1051,6 +1071,7 @@
                    {
                        "name": "turn_id",
                        "in": "path",
                        "description": "The ID of the turn to get.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -2105,7 +2126,7 @@
            "get": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "The status of the evaluationjob.",
                        "content": {
                            "application/json": {
                                "schema": {
@ -2137,11 +2158,12 @@
                "tags": [
                    "Eval"
                ],
-                "description": "",
+                "description": "Get the status of a job.",
                "parameters": [
                    {
                        "name": "benchmark_id",
                        "in": "path",
                        "description": "The ID of the benchmark to run the evaluation on.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -2150,6 +2172,7 @@
                    {
                        "name": "job_id",
                        "in": "path",
                        "description": "The ID of the job to get the status of.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -2178,11 +2201,12 @@
                "tags": [
                    "Eval"
                ],
-                "description": "",
+                "description": "Cancel a job.",
                "parameters": [
                    {
                        "name": "benchmark_id",
                        "in": "path",
                        "description": "The ID of the benchmark to run the evaluation on.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -2191,6 +2215,7 @@
                    {
                        "name": "job_id",
                        "in": "path",
                        "description": "The ID of the job to cancel.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -2203,7 +2228,7 @@
            "get": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "The result of the job.",
                        "content": {
                            "application/json": {
                                "schema": {
@ -2228,11 +2253,12 @@
                "tags": [
                    "Eval"
                ],
-                "description": "",
+                "description": "Get the result of a job.",
                "parameters": [
                    {
                        "name": "benchmark_id",
                        "in": "path",
                        "description": "The ID of the benchmark to run the evaluation on.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -2241,6 +2267,7 @@
                    {
                        "name": "job_id",
                        "in": "path",
                        "description": "The ID of the job to get the result of.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -3271,7 +3298,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "The job that was created to run the evaluation.",
                        "content": {
                            "application/json": {
                                "schema": {
@ -3296,11 +3323,12 @@
                "tags": [
                    "Eval"
                ],
-                "description": "",
+                "description": "Run an evaluation on a benchmark.",
                "parameters": [
                    {
                        "name": "benchmark_id",
                        "in": "path",
                        "description": "The ID of the benchmark to run the evaluation on.",
                        "required": true,
                        "schema": {
                            "type": "string"
@ -3402,7 +3430,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "ScoreResponse object containing rows and aggregated results",
                        "content": {
                            "application/json": {
                                "schema": {
@ -3427,7 +3455,7 @@
                "tags": [
                    "Scoring"
                ],
-                "description": "",
+                "description": "Score a list of rows.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -5192,7 +5220,8 @@
                "type": "object",
                "properties": {
                    "agent_config": {
-                        "$ref": "#/components/schemas/AgentConfig"
+                        "$ref": "#/components/schemas/AgentConfig",
                        "description": "The configuration for the agent."
                    }
                },
                "additionalProperties": false,
@ -5218,7 +5247,8 @@
                "type": "object",
                "properties": {
                    "session_name": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The name of the session to create."
                    }
                },
                "additionalProperties": false,
@ -5254,10 +5284,12 @@
                                    "$ref": "#/components/schemas/ToolResponseMessage"
                                }
                            ]
-                        }
+                        },
                        "description": "List of messages to start the turn with."
                    },
                    "stream": {
-                        "type": "boolean"
+                        "type": "boolean",
                        "description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
                    },
                    "documents": {
                        "type": "array",
@ -5281,10 +5313,12 @@
                                        {
                                            "$ref": "#/components/schemas/URL"
                                        }
-                                    ]
+                                    ],
                                    "description": "The content of the document."
                                },
                                "mime_type": {
-                                    "type": "string"
+                                    "type": "string",
                                    "description": "The MIME type of the document."
                                }
                            },
                            "additionalProperties": false,
@ -5292,17 +5326,21 @@
                                "content",
                                "mime_type"
                            ],
-                            "title": "Document"
+                            "title": "Document",
-                        }
+                            "description": "A document to be used by an agent."
                        },
                        "description": "(Optional) List of documents to create the turn with."
                    },
                    "toolgroups": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/AgentTool"
-                        }
+                        },
                        "description": "(Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request."
                    },
                    "tool_config": {
-                        "$ref": "#/components/schemas/ToolConfig"
+                        "$ref": "#/components/schemas/ToolConfig",
                        "description": "(Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config."
                    }
                },
                "additionalProperties": false,
@ -5315,18 +5353,22 @@
                "type": "object",
                "properties": {
                    "turn_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the turn."
                    },
                    "step_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the step."
                    },
                    "started_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step started."
                    },
                    "completed_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step completed."
                    },
                    "step_type": {
                        "type": "string",
@ -5334,7 +5376,8 @@
                        "default": "inference"
                    },
                    "model_response": {
-                        "$ref": "#/components/schemas/CompletionMessage"
+                        "$ref": "#/components/schemas/CompletionMessage",
                        "description": "The response from the LLM."
                    }
                },
                "additionalProperties": false,
@ -5344,24 +5387,29 @@
                    "step_type",
                    "model_response"
                ],
-                "title": "InferenceStep"
+                "title": "InferenceStep",
                "description": "An inference step in an agent turn."
            },
            "MemoryRetrievalStep": {
                "type": "object",
                "properties": {
                    "turn_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the turn."
                    },
                    "step_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the step."
                    },
                    "started_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step started."
                    },
                    "completed_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step completed."
                    },
                    "step_type": {
                        "type": "string",
@ -5369,10 +5417,12 @@
                        "default": "memory_retrieval"
                    },
                    "vector_db_ids": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The IDs of the vector databases to retrieve context from."
                    },
                    "inserted_context": {
-                        "$ref": "#/components/schemas/InterleavedContent"
+                        "$ref": "#/components/schemas/InterleavedContent",
                        "description": "The context retrieved from the vector databases."
                    }
                },
                "additionalProperties": false,
@ -5383,7 +5433,8 @@
                    "vector_db_ids",
                    "inserted_context"
                ],
-                "title": "MemoryRetrievalStep"
+                "title": "MemoryRetrievalStep",
                "description": "A memory retrieval step in an agent turn."
            },
            "SafetyViolation": {
                "type": "object",
@ -5431,18 +5482,22 @@
                "type": "object",
                "properties": {
                    "turn_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the turn."
                    },
                    "step_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the step."
                    },
                    "started_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step started."
                    },
                    "completed_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step completed."
                    },
                    "step_type": {
                        "type": "string",
@ -5450,7 +5505,8 @@
                        "default": "shield_call"
                    },
                    "violation": {
-                        "$ref": "#/components/schemas/SafetyViolation"
+                        "$ref": "#/components/schemas/SafetyViolation",
                        "description": "The violation from the shield call."
                    }
                },
                "additionalProperties": false,
@ -5459,24 +5515,29 @@
                    "step_id",
                    "step_type"
                ],
-                "title": "ShieldCallStep"
+                "title": "ShieldCallStep",
                "description": "A shield call step in an agent turn."
            },
            "ToolExecutionStep": {
                "type": "object",
                "properties": {
                    "turn_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the turn."
                    },
                    "step_id": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The ID of the step."
                    },
                    "started_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step started."
                    },
                    "completed_at": {
                        "type": "string",
-                        "format": "date-time"
+                        "format": "date-time",
                        "description": "The time the step completed."
                    },
                    "step_type": {
                        "type": "string",
@ -5487,13 +5548,15 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/ToolCall"
-                        }
+                        },
                        "description": "The tool calls to execute."
                    },
                    "tool_responses": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/ToolResponse"
-                        }
+                        },
                        "description": "The tool responses from the tool calls."
                    }
                },
                "additionalProperties": false,
@ -5504,7 +5567,8 @@
                    "tool_calls",
                    "tool_responses"
                ],
-                "title": "ToolExecutionStep"
+                "title": "ToolExecutionStep",
                "description": "A tool execution step in an agent turn."
            },
            "ToolResponse": {
                "type": "object",
@ -5641,10 +5705,12 @@
                                        {
                                            "$ref": "#/components/schemas/URL"
                                        }
-                                    ]
+                                    ],
                                    "description": "The content of the attachment."
                                },
                                "mime_type": {
-                                    "type": "string"
+                                    "type": "string",
                                    "description": "The MIME type of the attachment."
                                }
                            },
                            "additionalProperties": false,
@ -5652,7 +5718,8 @@
                                "content",
                                "mime_type"
                            ],
-                            "title": "Attachment"
+                            "title": "Attachment",
                            "description": "An attachment to an agent turn."
                        }
                    },
                    "started_at": {
@ -5747,7 +5814,8 @@
                            "shield_call",
                            "memory_retrieval"
                        ],
-                        "title": "StepType"
+                        "title": "StepType",
                        "description": "Type of the step in an agent turn."
                    },
                    "step_id": {
                        "type": "string"
@ -5803,7 +5871,8 @@
                            "shield_call",
                            "memory_retrieval"
                        ],
-                        "title": "StepType"
+                        "title": "StepType",
                        "description": "Type of the step in an agent turn."
                    },
                    "step_id": {
                        "type": "string"
@ -5837,7 +5906,8 @@
                            "shield_call",
                            "memory_retrieval"
                        ],
-                        "title": "StepType"
+                        "title": "StepType",
                        "description": "Type of the step in an agent turn."
                    },
                    "step_id": {
                        "type": "string"
@ -6129,7 +6199,8 @@
                        "default": "agent"
                    },
                    "config": {
-                        "$ref": "#/components/schemas/AgentConfig"
+                        "$ref": "#/components/schemas/AgentConfig",
                        "description": "The configuration for the agent candidate."
                    }
                },
                "additionalProperties": false,
@ -6137,7 +6208,8 @@
                    "type",
                    "config"
                ],
-                "title": "AgentCandidate"
+                "title": "AgentCandidate",
                "description": "An agent candidate for evaluation."
            },
            "AggregationFunctionType": {
                "type": "string",
@ -6174,16 +6246,19 @@
                "type": "object",
                "properties": {
                    "eval_candidate": {
-                        "$ref": "#/components/schemas/EvalCandidate"
+                        "$ref": "#/components/schemas/EvalCandidate",
                        "description": "The candidate to evaluate."
                    },
                    "scoring_params": {
                        "type": "object",
                        "additionalProperties": {
                            "$ref": "#/components/schemas/ScoringFnParams"
-                        }
+                        },
                        "description": "Map between scoring function id and parameters for each scoring function you want to run"
                    },
                    "num_examples": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
                    }
                },
                "additionalProperties": false,
@ -6191,7 +6266,8 @@
                    "eval_candidate",
                    "scoring_params"
                ],
-                "title": "BenchmarkConfig"
+                "title": "BenchmarkConfig",
                "description": "A benchmark configuration for evaluation."
            },
            "EvalCandidate": {
                "oneOf": [
@ -6253,13 +6329,16 @@
                        "default": "model"
                    },
                    "model": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The model ID to evaluate."
                    },
                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
+                        "$ref": "#/components/schemas/SamplingParams",
                        "description": "The sampling parameters for the model."
                    },
                    "system_message": {
-                        "$ref": "#/components/schemas/SystemMessage"
+                        "$ref": "#/components/schemas/SystemMessage",
                        "description": "(Optional) The system message providing instructions or context to the model."
                    }
                },
                "additionalProperties": false,
@ -6268,7 +6347,8 @@
                    "model",
                    "sampling_params"
                ],
-                "title": "ModelCandidate"
+                "title": "ModelCandidate",
                "description": "A model candidate for evaluation."
            },
            "RegexParserScoringFnParams": {
                "type": "object",
@ -6347,16 +6427,19 @@
                                    }
                                ]
                            }
-                        }
+                        },
                        "description": "The rows to evaluate."
                    },
                    "scoring_functions": {
                        "type": "array",
                        "items": {
                            "type": "string"
-                        }
+                        },
                        "description": "The scoring functions to use for the evaluation."
                    },
                    "benchmark_config": {
-                        "$ref": "#/components/schemas/BenchmarkConfig"
+                        "$ref": "#/components/schemas/BenchmarkConfig",
                        "description": "The configuration for the benchmark."
                    }
                },
                "additionalProperties": false,
@ -6396,13 +6479,15 @@
                                    }
                                ]
                            }
-                        }
+                        },
                        "description": "The generations from the evaluation."
                    },
                    "scores": {
                        "type": "object",
                        "additionalProperties": {
                            "$ref": "#/components/schemas/ScoringResult"
-                        }
+                        },
                        "description": "The scores from the evaluation."
                    }
                },
                "additionalProperties": false,
@ -6410,7 +6495,8 @@
                    "generations",
                    "scores"
                ],
-                "title": "EvaluateResponse"
+                "title": "EvaluateResponse",
                "description": "The response from an evaluation."
            },
            "ScoringResult": {
                "type": "object",
@ -6441,7 +6527,8 @@
                                    }
                                ]
                            }
-                        }
+                        },
                        "description": "The scoring result for each row. Each row is a map of column name to value."
                    },
                    "aggregated_results": {
                        "type": "object",
@ -6466,7 +6553,8 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
                        "description": "Map of metric name to aggregated value"
                    }
                },
                "additionalProperties": false,
@ -6474,7 +6562,8 @@
                    "score_rows",
                    "aggregated_results"
                ],
-                "title": "ScoringResult"
+                "title": "ScoringResult",
                "description": "A scoring result for a single row."
            },
            "Session": {
                "type": "object",
@ -6963,13 +7052,16 @@
                                    }
                                ]
                            }
-                        }
+                        },
                        "description": "The rows in the current page."
                    },
                    "total_count": {
-                        "type": "integer"
+                        "type": "integer",
                        "description": "The total number of rows in the dataset."
                    },
                    "next_page_token": {
-                        "type": "string"
+                        "type": "string",
                        "description": "The token to get the next page of rows."
                    }
                },
                "additionalProperties": false,
@ -6977,7 +7069,8 @@
                    "rows",
                    "total_count"
                ],
-                "title": "PaginatedRowsResult"
+                "title": "PaginatedRowsResult",
                "description": "A paginated list of rows from a dataset."
            },
            "ScoringFn": {
                "type": "object",
@ -9249,7 +9342,8 @@
                "type": "object",
                "properties": {
                    "benchmark_config": {
-                        "$ref": "#/components/schemas/BenchmarkConfig"
+                        "$ref": "#/components/schemas/BenchmarkConfig",
                        "description": "The configuration for the benchmark."
                    }
                },
                "additionalProperties": false,
@ -9386,7 +9480,8 @@
                                    }
                                ]
                            }
-                        }
+                        },
                        "description": "The rows to score."
                    },
                    "scoring_functions": {
                        "type": "object",
@ -9399,7 +9494,8 @@
                                    "type": "null"
                                }
                            ]
-                        }
+                        },
                        "description": "The scoring functions to use for the scoring."
                    }
                },
                "additionalProperties": false,
@ -9416,14 +9512,16 @@
                        "type": "object",
                        "additionalProperties": {
                            "$ref": "#/components/schemas/ScoringResult"
-                        }
+                        },
                        "description": "A map of scoring function name to ScoringResult."
                    }
                },
                "additionalProperties": false,
                "required": [
                    "results"
                ],
-                "title": "ScoreResponse"
+                "title": "ScoreResponse",
                "description": "The response from scoring."
            },
            "ScoreBatchRequest": {
                "type": "object",
@ -9838,7 +9936,8 @@
            "name": "Datasets"
        },
        {
-            "name": "Eval"
+            "name": "Eval",
            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
        },
        {
            "name": "Files (Coming Soon)"
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -31,25 +31,32 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - DatasetIO
-      description: ''
+      description: >-
        Get a paginated list of rows from a dataset.
      parameters:
        - name: dataset_id
          in: query
          description: >-
            The ID of the dataset to get the rows from.
          required: true
          schema:
            type: string
        - name: rows_in_page
          in: query
          description: The number of rows to get per page.
          required: true
          schema:
            type: integer
        - name: page_token
          in: query
          description: The token to get the next page of rows.
          required: false
          schema:
            type: string
        - name: filter_condition
          in: query
          description: >-
            (Optional) A condition to filter the rows by.
          required: false
          schema:
            type: string
@ -234,7 +241,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
            An AgentCreateResponse with the agent ID.
          content:
            application/json:
              schema:
@ -251,7 +259,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: >-
        Create an agent with the given configuration.
      parameters: []
      requestBody:
        content:
@ -263,7 +272,7 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: An AgentSessionCreateResponse.
          content:
            application/json:
              schema:
@ -280,10 +289,12 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Create a new session for an agent.
      parameters:
        - name: agent_id
          in: path
          description: >-
            The ID of the agent to create the session for.
          required: true
          schema:
            type: string
@ -298,8 +309,8 @@ paths:
      responses:
        '200':
          description: >-
-            A single turn in an interaction with an Agentic System. **OR** streamed
+            If stream=False, returns a Turn object. If stream=True, returns an SSE
-            agent turn completion response.
+            event stream of AgentTurnResponseStreamChunk
          content:
            application/json:
              schema:
@ -319,15 +330,19 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Create a new turn for an agent.
      parameters:
        - name: agent_id
          in: path
          description: >-
            The ID of the agent to create the turn for.
          required: true
          schema:
            type: string
        - name: session_id
          in: path
          description: >-
            The ID of the session to create the turn for.
          required: true
          schema:
            type: string
@ -411,10 +426,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Delete an agent by its ID.
      parameters:
        - name: agent_id
          in: path
          description: The ID of the agent to delete.
          required: true
          schema:
            type: string
@ -439,20 +455,25 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Retrieve an agent session by its ID.
      parameters:
        - name: session_id
          in: path
          description: The ID of the session to get.
          required: true
          schema:
            type: string
        - name: agent_id
          in: path
          description: >-
            The ID of the agent to get the session for.
          required: true
          schema:
            type: string
        - name: turn_ids
          in: query
          description: >-
            (Optional) List of turn IDs to filter the session by.
          required: false
          schema:
            type: array
@ -474,15 +495,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Delete an agent session by its ID.
      parameters:
        - name: session_id
          in: path
          description: The ID of the session to delete.
          required: true
          schema:
            type: string
        - name: agent_id
          in: path
          description: >-
            The ID of the agent to delete the session for.
          required: true
          schema:
            type: string
@ -596,7 +620,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
            EvaluateResponse object containing generations and scores
          content:
            application/json:
              schema:
@ -613,10 +638,12 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Evaluate a list of rows on a benchmark.
      parameters:
        - name: benchmark_id
          in: path
          description: >-
            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
@ -630,7 +657,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: An AgentStepResponse.
          content:
            application/json:
              schema:
@ -647,25 +674,30 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Retrieve an agent step by its ID.
      parameters:
        - name: agent_id
          in: path
          description: The ID of the agent to get the step for.
          required: true
          schema:
            type: string
        - name: session_id
          in: path
          description: >-
            The ID of the session to get the step for.
          required: true
          schema:
            type: string
        - name: turn_id
          in: path
          description: The ID of the turn to get the step for.
          required: true
          schema:
            type: string
        - name: step_id
          in: path
          description: The ID of the step to get.
          required: true
          schema:
            type: string
@ -673,7 +705,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: A Turn.
          content:
            application/json:
              schema:
@ -690,20 +722,24 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Retrieve an agent turn by its ID.
      parameters:
        - name: agent_id
          in: path
          description: The ID of the agent to get the turn for.
          required: true
          schema:
            type: string
        - name: session_id
          in: path
          description: >-
            The ID of the session to get the turn for.
          required: true
          schema:
            type: string
        - name: turn_id
          in: path
          description: The ID of the turn to get.
          required: true
          schema:
            type: string
@ -1391,7 +1427,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: The status of the evaluationjob.
          content:
            application/json:
              schema:
@ -1410,15 +1446,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Get the status of a job.
      parameters:
        - name: benchmark_id
          in: path
          description: >-
            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
        - name: job_id
          in: path
          description: The ID of the job to get the status of.
          required: true
          schema:
            type: string
@ -1438,15 +1477,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Cancel a job.
      parameters:
        - name: benchmark_id
          in: path
          description: >-
            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
        - name: job_id
          in: path
          description: The ID of the job to cancel.
          required: true
          schema:
            type: string
@ -1454,7 +1496,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: The result of the job.
          content:
            application/json:
              schema:
@ -1471,15 +1513,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Get the result of a job.
      parameters:
        - name: benchmark_id
          in: path
          description: >-
            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
        - name: job_id
          in: path
          description: The ID of the job to get the result of.
          required: true
          schema:
            type: string
@ -2192,7 +2237,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
            The job that was created to run the evaluation.
          content:
            application/json:
              schema:
@ -2209,10 +2255,12 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Run an evaluation on a benchmark.
      parameters:
        - name: benchmark_id
          in: path
          description: >-
            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
@ -2280,7 +2328,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
            ScoreResponse object containing rows and aggregated results
          content:
            application/json:
              schema:
@ -2297,7 +2346,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Scoring
-      description: ''
+      description: Score a list of rows.
      parameters: []
      requestBody:
        content:
@ -3567,6 +3616,7 @@ components:
      properties:
        agent_config:
          $ref: '#/components/schemas/AgentConfig'
          description: The configuration for the agent.
      additionalProperties: false
      required:
        - agent_config
@ -3585,6 +3635,7 @@ components:
      properties:
        session_name:
          type: string
          description: The name of the session to create.
      additionalProperties: false
      required:
        - session_name
@ -3607,8 +3658,12 @@ components:
            oneOf:
              - $ref: '#/components/schemas/UserMessage'
              - $ref: '#/components/schemas/ToolResponseMessage'
          description: List of messages to start the turn with.
        stream:
          type: boolean
          description: >-
            (Optional) If True, generate an SSE event stream of the response. Defaults
            to False.
        documents:
          type: array
          items:
@ -3622,19 +3677,30 @@ components:
                    items:
                      $ref: '#/components/schemas/InterleavedContentItem'
                  - $ref: '#/components/schemas/URL'
                description: The content of the document.
              mime_type:
                type: string
                description: The MIME type of the document.
            additionalProperties: false
            required:
              - content
              - mime_type
            title: Document
            description: A document to be used by an agent.
          description: >-
            (Optional) List of documents to create the turn with.
        toolgroups:
          type: array
          items:
            $ref: '#/components/schemas/AgentTool'
          description: >-
            (Optional) List of toolgroups to create the turn with, will be used in
            addition to the agent's config toolgroups for the request.
        tool_config:
          $ref: '#/components/schemas/ToolConfig'
          description: >-
            (Optional) The tool configuration to create the turn with, will be used
            to override the agent's tool_config.
      additionalProperties: false
      required:
        - messages
@ -3644,20 +3710,25 @@ components:
      properties:
        turn_id:
          type: string
          description: The ID of the turn.
        step_id:
          type: string
          description: The ID of the step.
        started_at:
          type: string
          format: date-time
          description: The time the step started.
        completed_at:
          type: string
          format: date-time
          description: The time the step completed.
        step_type:
          type: string
          const: inference
          default: inference
        model_response:
          $ref: '#/components/schemas/CompletionMessage'
          description: The response from the LLM.
      additionalProperties: false
      required:
        - turn_id
@ -3665,27 +3736,36 @@ components:
        - step_type
        - model_response
      title: InferenceStep
      description: An inference step in an agent turn.
    MemoryRetrievalStep:
      type: object
      properties:
        turn_id:
          type: string
          description: The ID of the turn.
        step_id:
          type: string
          description: The ID of the step.
        started_at:
          type: string
          format: date-time
          description: The time the step started.
        completed_at:
          type: string
          format: date-time
          description: The time the step completed.
        step_type:
          type: string
          const: memory_retrieval
          default: memory_retrieval
        vector_db_ids:
          type: string
          description: >-
            The IDs of the vector databases to retrieve context from.
        inserted_context:
          $ref: '#/components/schemas/InterleavedContent'
          description: >-
            The context retrieved from the vector databases.
      additionalProperties: false
      required:
        - turn_id
@ -3694,6 +3774,8 @@ components:
        - vector_db_ids
        - inserted_context
      title: MemoryRetrievalStep
      description: >-
        A memory retrieval step in an agent turn.
    SafetyViolation:
      type: object
      properties:
@ -3721,39 +3803,49 @@ components:
      properties:
        turn_id:
          type: string
          description: The ID of the turn.
        step_id:
          type: string
          description: The ID of the step.
        started_at:
          type: string
          format: date-time
          description: The time the step started.
        completed_at:
          type: string
          format: date-time
          description: The time the step completed.
        step_type:
          type: string
          const: shield_call
          default: shield_call
        violation:
          $ref: '#/components/schemas/SafetyViolation'
          description: The violation from the shield call.
      additionalProperties: false
      required:
        - turn_id
        - step_id
        - step_type
      title: ShieldCallStep
      description: A shield call step in an agent turn.
    ToolExecutionStep:
      type: object
      properties:
        turn_id:
          type: string
          description: The ID of the turn.
        step_id:
          type: string
          description: The ID of the step.
        started_at:
          type: string
          format: date-time
          description: The time the step started.
        completed_at:
          type: string
          format: date-time
          description: The time the step completed.
        step_type:
          type: string
          const: tool_execution
@ -3762,10 +3854,12 @@ components:
          type: array
          items:
            $ref: '#/components/schemas/ToolCall'
          description: The tool calls to execute.
        tool_responses:
          type: array
          items:
            $ref: '#/components/schemas/ToolResponse'
          description: The tool responses from the tool calls.
      additionalProperties: false
      required:
        - turn_id
@ -3774,6 +3868,7 @@ components:
        - tool_calls
        - tool_responses
      title: ToolExecutionStep
      description: A tool execution step in an agent turn.
    ToolResponse:
      type: object
      properties:
@ -3850,13 +3945,16 @@ components:
                    items:
                      $ref: '#/components/schemas/InterleavedContentItem'
                  - $ref: '#/components/schemas/URL'
                description: The content of the attachment.
              mime_type:
                type: string
                description: The MIME type of the attachment.
            additionalProperties: false
            required:
              - content
              - mime_type
            title: Attachment
            description: An attachment to an agent turn.
        started_at:
          type: string
          format: date-time
@ -3922,6 +4020,7 @@ components:
            - shield_call
            - memory_retrieval
          title: StepType
          description: Type of the step in an agent turn.
        step_id:
          type: string
        step_details:
@ -3959,6 +4058,7 @@ components:
            - shield_call
            - memory_retrieval
          title: StepType
          description: Type of the step in an agent turn.
        step_id:
          type: string
        delta:
@ -3985,6 +4085,7 @@ components:
            - shield_call
            - memory_retrieval
          title: StepType
          description: Type of the step in an agent turn.
        step_id:
          type: string
        metadata:
@ -4212,11 +4313,14 @@ components:
          default: agent
        config:
          $ref: '#/components/schemas/AgentConfig'
          description: >-
            The configuration for the agent candidate.
      additionalProperties: false
      required:
        - type
        - config
      title: AgentCandidate
      description: An agent candidate for evaluation.
    AggregationFunctionType:
      type: string
      enum:
@ -4245,17 +4349,26 @@ components:
      properties:
        eval_candidate:
          $ref: '#/components/schemas/EvalCandidate'
          description: The candidate to evaluate.
        scoring_params:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/ScoringFnParams'
          description: >-
            Map between scoring function id and parameters for each scoring function
            you want to run
        num_examples:
          type: integer
          description: >-
            (Optional) The number of examples to evaluate. If not provided, all examples
            in the dataset will be evaluated
      additionalProperties: false
      required:
        - eval_candidate
        - scoring_params
      title: BenchmarkConfig
      description: >-
        A benchmark configuration for evaluation.
    EvalCandidate:
      oneOf:
        - $ref: '#/components/schemas/ModelCandidate'
@ -4298,16 +4411,22 @@ components:
          default: model
        model:
          type: string
          description: The model ID to evaluate.
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
          description: The sampling parameters for the model.
        system_message:
          $ref: '#/components/schemas/SystemMessage'
          description: >-
            (Optional) The system message providing instructions or context to the
            model.
      additionalProperties: false
      required:
        - type
        - model
        - sampling_params
      title: ModelCandidate
      description: A model candidate for evaluation.
    RegexParserScoringFnParams:
      type: object
      properties:
@ -4353,12 +4472,16 @@ components:
                - type: string
                - type: array
                - type: object
          description: The rows to evaluate.
        scoring_functions:
          type: array
          items:
            type: string
          description: >-
            The scoring functions to use for the evaluation.
        benchmark_config:
          $ref: '#/components/schemas/BenchmarkConfig'
          description: The configuration for the benchmark.
      additionalProperties: false
      required:
        - input_rows
@ -4380,15 +4503,18 @@ components:
                - type: string
                - type: array
                - type: object
          description: The generations from the evaluation.
        scores:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/ScoringResult'
          description: The scores from the evaluation.
      additionalProperties: false
      required:
        - generations
        - scores
      title: EvaluateResponse
      description: The response from an evaluation.
    ScoringResult:
      type: object
      properties:
@ -4404,6 +4530,8 @@ components:
                - type: string
                - type: array
                - type: object
          description: >-
            The scoring result for each row. Each row is a map of column name to value.
        aggregated_results:
          type: object
          additionalProperties:
@ -4414,11 +4542,13 @@ components:
              - type: string
              - type: array
              - type: object
          description: Map of metric name to aggregated value
      additionalProperties: false
      required:
        - score_rows
        - aggregated_results
      title: ScoringResult
      description: A scoring result for a single row.
    Session:
      type: object
      properties:
@ -4731,15 +4861,19 @@ components:
                - type: string
                - type: array
                - type: object
          description: The rows in the current page.
        total_count:
          type: integer
          description: The total number of rows in the dataset.
        next_page_token:
          type: string
          description: The token to get the next page of rows.
      additionalProperties: false
      required:
        - rows
        - total_count
      title: PaginatedRowsResult
      description: A paginated list of rows from a dataset.
    ScoringFn:
      type: object
      properties:
@ -6170,6 +6304,7 @@ components:
      properties:
        benchmark_config:
          $ref: '#/components/schemas/BenchmarkConfig'
          description: The configuration for the benchmark.
      additionalProperties: false
      required:
        - benchmark_config
@ -6251,12 +6386,15 @@ components:
                - type: string
                - type: array
                - type: object
          description: The rows to score.
        scoring_functions:
          type: object
          additionalProperties:
            oneOf:
              - $ref: '#/components/schemas/ScoringFnParams'
              - type: 'null'
          description: >-
            The scoring functions to use for the scoring.
      additionalProperties: false
      required:
        - input_rows
@ -6269,10 +6407,13 @@ components:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/ScoringResult'
          description: >-
            A map of scoring function name to ScoringResult.
      additionalProperties: false
      required:
        - results
      title: ScoreResponse
      description: The response from scoring.
    ScoreBatchRequest:
      type: object
      properties:
@ -6543,6 +6684,8 @@ tags:
  - name: DatasetIO
  - name: Datasets
  - name: Eval
    x-displayName: >-
      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: Files (Coming Soon)
  - name: Inference
    description: >-
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -41,16 +41,36 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
 class Attachment(BaseModel):
    """An attachment to an agent turn.
    :param content: The content of the attachment.
    :param mime_type: The MIME type of the attachment.
    """
    content: InterleavedContent | URL
    mime_type: str
 class Document(BaseModel):
    """A document to be used by an agent.
    :param content: The content of the document.
    :param mime_type: The MIME type of the document.
    """
    content: InterleavedContent | URL
    mime_type: str
 class StepCommon(BaseModel):
    """A common step in an agent turn.
    :param turn_id: The ID of the turn.
    :param step_id: The ID of the step.
    :param started_at: The time the step started.
    :param completed_at: The time the step completed.
    """
    turn_id: str
    step_id: str
    started_at: Optional[datetime] = None
@ -58,6 +78,14 @@ class StepCommon(BaseModel):
 class StepType(Enum):
    """Type of the step in an agent turn.
    :cvar inference: The step is an inference step that calls an LLM.
    :cvar tool_execution: The step is a tool execution step that executes a tool call.
    :cvar shield_call: The step is a shield call step that checks for safety violations.
    :cvar memory_retrieval: The step is a memory retrieval step that retrieves context for vector dbs.
    """
    inference = "inference"
    tool_execution = "tool_execution"
    shield_call = "shield_call"
@ -66,6 +94,11 @@ class StepType(Enum):
@json_schema_type
 class InferenceStep(StepCommon):
    """An inference step in an agent turn.
    :param model_response: The response from the LLM.
    """
    model_config = ConfigDict(protected_namespaces=())
    step_type: Literal[StepType.inference.value] = StepType.inference.value
@ -74,6 +107,12 @@ class InferenceStep(StepCommon):
@json_schema_type
 class ToolExecutionStep(StepCommon):
    """A tool execution step in an agent turn.
    :param tool_calls: The tool calls to execute.
    :param tool_responses: The tool responses from the tool calls.
    """
    step_type: Literal[StepType.tool_execution.value] = StepType.tool_execution.value
    tool_calls: List[ToolCall]
    tool_responses: List[ToolResponse]
@ -81,13 +120,25 @@ class ToolExecutionStep(StepCommon):
@json_schema_type
 class ShieldCallStep(StepCommon):
    """A shield call step in an agent turn.
    :param violation: The violation from the shield call.
    """
    step_type: Literal[StepType.shield_call.value] = StepType.shield_call.value
    violation: Optional[SafetyViolation]
@json_schema_type
 class MemoryRetrievalStep(StepCommon):
    """A memory retrieval step in an agent turn.
    :param vector_db_ids: The IDs of the vector databases to retrieve context from.
    :param inserted_context: The context retrieved from the vector databases.
    """
    step_type: Literal[StepType.memory_retrieval.value] = StepType.memory_retrieval.value
    # TODO: should this be List[str]?
    vector_db_ids: str
    inserted_context: InterleavedContent
@ -335,7 +386,13 @@ class Agents(Protocol):
    async def create_agent(
        self,
        agent_config: AgentConfig,
-    ) -> AgentCreateResponse: ...
+    ) -> AgentCreateResponse:
        """Create an agent with the given configuration.
        :param agent_config: The configuration for the agent.
        :returns: An AgentCreateResponse with the agent ID.
        """
        ...
    @webmethod(route="/agents/{agent_id}/session/{session_id}/turn", method="POST")
    async def create_agent_turn(
@ -352,7 +409,19 @@ class Agents(Protocol):
        documents: Optional[List[Document]] = None,
        toolgroups: Optional[List[AgentToolGroup]] = None,
        tool_config: Optional[ToolConfig] = None,
-    ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
+    ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]:
        """Create a new turn for an agent.
        :param agent_id: The ID of the agent to create the turn for.
        :param session_id: The ID of the session to create the turn for.
        :param messages: List of messages to start the turn with.
        :param stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False.
        :param documents: (Optional) List of documents to create the turn with.
        :param toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition to the agent's config toolgroups for the request.
        :param tool_config: (Optional) The tool configuration to create the turn with, will be used to override the agent's tool_config.
        :returns: If stream=False, returns a Turn object.
                  If stream=True, returns an SSE event stream of AgentTurnResponseStreamChunk
        """
    @webmethod(
        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
@ -388,7 +457,15 @@ class Agents(Protocol):
        agent_id: str,
        session_id: str,
        turn_id: str,
-    ) -> Turn: ...
+    ) -> Turn:
        """Retrieve an agent turn by its ID.
        :param agent_id: The ID of the agent to get the turn for.
        :param session_id: The ID of the session to get the turn for.
        :param turn_id: The ID of the turn to get.
        :returns: A Turn.
        """
        ...
    @webmethod(
        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
@ -400,14 +477,30 @@ class Agents(Protocol):
        session_id: str,
        turn_id: str,
        step_id: str,
-    ) -> AgentStepResponse: ...
+    ) -> AgentStepResponse:
        """Retrieve an agent step by its ID.
        :param agent_id: The ID of the agent to get the step for.
        :param session_id: The ID of the session to get the step for.
        :param turn_id: The ID of the turn to get the step for.
        :param step_id: The ID of the step to get.
        :returns: An AgentStepResponse.
        """
        ...
    @webmethod(route="/agents/{agent_id}/session", method="POST")
    async def create_agent_session(
        self,
        agent_id: str,
        session_name: str,
-    ) -> AgentSessionCreateResponse: ...
+    ) -> AgentSessionCreateResponse:
        """Create a new session for an agent.
        :param agent_id: The ID of the agent to create the session for.
        :param session_name: The name of the session to create.
        :returns: An AgentSessionCreateResponse.
        """
        ...
    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
    async def get_agents_session(
@ -415,17 +508,35 @@ class Agents(Protocol):
        session_id: str,
        agent_id: str,
        turn_ids: Optional[List[str]] = None,
-    ) -> Session: ...
+    ) -> Session:
        """Retrieve an agent session by its ID.
        :param session_id: The ID of the session to get.
        :param agent_id: The ID of the agent to get the session for.
        :param turn_ids: (Optional) List of turn IDs to filter the session by.
        """
        ...
    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
    async def delete_agents_session(
        self,
        session_id: str,
        agent_id: str,
-    ) -> None: ...
+    ) -> None:
        """Delete an agent session by its ID.
        :param session_id: The ID of the session to delete.
        :param agent_id: The ID of the agent to delete the session for.
        """
        ...
    @webmethod(route="/agents/{agent_id}", method="DELETE")
    async def delete_agent(
        self,
        agent_id: str,
-    ) -> None: ...
+    ) -> None:
        """Delete an agent by its ID.
        :param agent_id: The ID of the agent to delete.
        """
        ...
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@ -14,6 +14,14 @@ from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
 class PaginatedRowsResult(BaseModel):
    """
    A paginated list of rows from a dataset.
    :param rows: The rows in the current page.
    :param total_count: The total number of rows in the dataset.
    :param next_page_token: The token to get the next page of rows.
    """
    # the rows obey the DatasetSchema for the given dataset
    rows: List[Dict[str, Any]]
    total_count: int
@ -36,7 +44,15 @@ class DatasetIO(Protocol):
        rows_in_page: int,
        page_token: Optional[str] = None,
        filter_condition: Optional[str] = None,
-    ) -> PaginatedRowsResult: ...
+    ) -> PaginatedRowsResult:
        """Get a paginated list of rows from a dataset.
        :param dataset_id: The ID of the dataset to get the rows from.
        :param rows_in_page: The number of rows to get per page.
        :param page_token: The token to get the next page of rows.
        :param filter_condition: (Optional) A condition to filter the rows by.
        """
        ...
    @webmethod(route="/datasetio/rows", method="POST")
    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -19,6 +19,13 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho
@json_schema_type
 class ModelCandidate(BaseModel):
    """A model candidate for evaluation.
    :param model: The model ID to evaluate.
    :param sampling_params: The sampling parameters for the model.
    :param system_message: (Optional) The system message providing instructions or context to the model.
    """
    type: Literal["model"] = "model"
    model: str
    sampling_params: SamplingParams
@ -27,6 +34,11 @@ class ModelCandidate(BaseModel):
@json_schema_type
 class AgentCandidate(BaseModel):
    """An agent candidate for evaluation.
    :param config: The configuration for the agent candidate.
    """
    type: Literal["agent"] = "agent"
    config: AgentConfig
@ -39,6 +51,13 @@ EvalCandidate = register_schema(
@json_schema_type
 class BenchmarkConfig(BaseModel):
    """A benchmark configuration for evaluation.
    :param eval_candidate: The candidate to evaluate.
    :param scoring_params: Map between scoring function id and parameters for each scoring function you want to run
    :param num_examples: (Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated
    """
    eval_candidate: EvalCandidate
    scoring_params: Dict[str, ScoringFnParams] = Field(
        description="Map between scoring function id and parameters for each scoring function you want to run",
@ -53,18 +72,32 @@ class BenchmarkConfig(BaseModel):
@json_schema_type
 class EvaluateResponse(BaseModel):
    """The response from an evaluation.
    :param generations: The generations from the evaluation.
    :param scores: The scores from the evaluation.
    """
    generations: List[Dict[str, Any]]
    # each key in the dict is a scoring function name
    scores: Dict[str, ScoringResult]
 class Eval(Protocol):
    """Llama Stack Evaluation API for running evaluations on model and agent candidates."""
    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
    async def run_eval(
        self,
        benchmark_id: str,
        benchmark_config: BenchmarkConfig,
-    ) -> Job: ...
+    ) -> Job:
        """Run an evaluation on a benchmark.
        :param benchmark_id: The ID of the benchmark to run the evaluation on.
        :param benchmark_config: The configuration for the benchmark.
        :return: The job that was created to run the evaluation.
        """
    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
    async def evaluate_rows(
@ -73,13 +106,40 @@ class Eval(Protocol):
        input_rows: List[Dict[str, Any]],
        scoring_functions: List[str],
        benchmark_config: BenchmarkConfig,
-    ) -> EvaluateResponse: ...
+    ) -> EvaluateResponse:
        """Evaluate a list of rows on a benchmark.
        :param benchmark_id: The ID of the benchmark to run the evaluation on.
        :param input_rows: The rows to evaluate.
        :param scoring_functions: The scoring functions to use for the evaluation.
        :param benchmark_config: The configuration for the benchmark.
        :return: EvaluateResponse object containing generations and scores
        """
    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
-    async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: ...
+    async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
        """Get the status of a job.
        :param benchmark_id: The ID of the benchmark to run the evaluation on.
        :param job_id: The ID of the job to get the status of.
        :return: The status of the evaluationjob.
        """
        ...
    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
-    async def job_cancel(self, benchmark_id: str, job_id: str) -> None: ...
+    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
        """Cancel a job.
        :param benchmark_id: The ID of the benchmark to run the evaluation on.
        :param job_id: The ID of the job to cancel.
        """
        ...
    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
-    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: ...
+    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
        """Get the result of a job.
        :param benchmark_id: The ID of the benchmark to run the evaluation on.
        :param job_id: The ID of the job to get the result of.
        :return: The result of the job.
        """
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@ -17,6 +17,13 @@ ScoringResultRow = Dict[str, Any]
@json_schema_type
 class ScoringResult(BaseModel):
    """
    A scoring result for a single row.
    :param score_rows: The scoring result for each row. Each row is a map of column name to value.
    :param aggregated_results: Map of metric name to aggregated value
    """
    score_rows: List[ScoringResultRow]
    # aggregated metrics to value
    aggregated_results: Dict[str, Any]
@ -30,6 +37,12 @@ class ScoreBatchResponse(BaseModel):
@json_schema_type
 class ScoreResponse(BaseModel):
    """
    The response from scoring.
    :param results: A map of scoring function name to ScoringResult.
    """
    # each key in the dict is a scoring function name
    results: Dict[str, ScoringResult]
@ -55,4 +68,11 @@ class Scoring(Protocol):
        self,
        input_rows: List[Dict[str, Any]],
        scoring_functions: Dict[str, Optional[ScoringFnParams]],
-    ) -> ScoreResponse: ...
+    ) -> ScoreResponse:
        """Score a list of rows.
        :param input_rows: The rows to score.
        :param scoring_functions: The scoring functions to use for the scoring.
        :return: ScoreResponse object containing rows and aggregated results
        """
        ...