diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 363d968f9..b1b1504ee 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
     "info": {
         "title": "[DRAFT] Llama Stack Specification",
         "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-31 14:28:52.128905"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-11-07 22:26:27.169134"
     },
     "servers": [
         {
@@ -469,7 +469,7 @@
                 }
             }
         },
-        "/eval/evaluate": {
+        "/eval/evaluate_rows": {
             "post": {
                 "responses": {
                     "200": {
@@ -501,47 +501,7 @@
                     "content": {
                         "application/json": {
                             "schema": {
-                                "$ref": "#/components/schemas/EvaluateRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/eval/evaluate_batch": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Job"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "Eval"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/EvaluateBatchRequest"
+                                "$ref": "#/components/schemas/EvaluateRowsRequest"
                             }
                         }
                     },
@@ -1002,7 +962,7 @@
                 ],
                 "parameters": [
                     {
-                        "name": "shield_type",
+                        "name": "identifier",
                         "in": "query",
                         "required": true,
                         "schema": {
@@ -1317,6 +1277,14 @@
                     "Eval"
                 ],
                 "parameters": [
+                    {
+                        "name": "task_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
                     {
                         "name": "job_id",
                         "in": "query",
@@ -1362,6 +1330,14 @@
                     "Eval"
                 ],
                 "parameters": [
+                    {
+                        "name": "task_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
                     {
                         "name": "job_id",
                         "in": "query",
@@ -1892,6 +1868,46 @@
                 }
             }
         },
+        "/eval/run_eval": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Job"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RunEvalRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/safety/run_shield": {
             "post": {
                 "responses": {
@@ -4490,6 +4506,103 @@
                     "config"
                 ]
             },
+            "AppEvalTaskConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "app",
+                        "default": "app"
+                    },
+                    "eval_candidate": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/ModelCandidate"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentCandidate"
+                            }
+                        ]
+                    },
+                    "scoring_params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/RegexParserScoringFnParams"
+                                }
+                            ]
+                        }
+                    },
+                    "num_examples": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "eval_candidate",
+                    "scoring_params"
+                ]
+            },
+            "BenchmarkEvalTaskConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "benchmark",
+                        "default": "benchmark"
+                    },
+                    "eval_candidate": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/ModelCandidate"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentCandidate"
+                            }
+                        ]
+                    },
+                    "num_examples": {
+                        "type": "integer"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "eval_candidate"
+                ]
+            },
+            "LLMAsJudgeScoringFnParams": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "llm_as_judge",
+                        "default": "llm_as_judge"
+                    },
+                    "judge_model": {
+                        "type": "string"
+                    },
+                    "prompt_template": {
+                        "type": "string"
+                    },
+                    "judge_score_regexes": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "judge_model"
+                ]
+            },
             "ModelCandidate": {
                 "type": "object",
                 "properties": {
@@ -4515,9 +4628,32 @@
                     "sampling_params"
                 ]
             },
-            "EvaluateRequest": {
+            "RegexParserScoringFnParams": {
                 "type": "object",
                 "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "regex_parser",
+                        "default": "regex_parser"
+                    },
+                    "parsing_regexes": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ]
+            },
+            "EvaluateRowsRequest": {
+                "type": "object",
+                "properties": {
+                    "task_id": {
+                        "type": "string"
+                    },
                     "input_rows": {
                         "type": "array",
                         "items": {
@@ -4546,28 +4682,29 @@
                             }
                         }
                     },
-                    "candidate": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/ModelCandidate"
-                            },
-                            {
-                                "$ref": "#/components/schemas/AgentCandidate"
-                            }
-                        ]
-                    },
                     "scoring_functions": {
                         "type": "array",
                         "items": {
                             "type": "string"
                         }
+                    },
+                    "task_config": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/BenchmarkEvalTaskConfig"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AppEvalTaskConfig"
+                            }
+                        ]
                     }
                 },
                 "additionalProperties": false,
                 "required": [
+                    "task_id",
                     "input_rows",
-                    "candidate",
-                    "scoring_functions"
+                    "scoring_functions",
+                    "task_config"
                 ]
             },
             "EvaluateResponse": {
@@ -4677,48 +4814,6 @@
                     "aggregated_results"
                 ]
             },
-            "EvaluateBatchRequest": {
-                "type": "object",
-                "properties": {
-                    "dataset_id": {
-                        "type": "string"
-                    },
-                    "candidate": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/ModelCandidate"
-                            },
-                            {
-                                "$ref": "#/components/schemas/AgentCandidate"
-                            }
-                        ]
-                    },
-                    "scoring_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dataset_id",
-                    "candidate",
-                    "scoring_functions"
-                ]
-            },
-            "Job": {
-                "type": "object",
-                "properties": {
-                    "job_id": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_id"
-                ]
-            },
             "GetAgentsSessionRequest": {
                 "type": "object",
                 "properties": {
@@ -5085,6 +5180,11 @@
                             ]
                         }
                     },
+                    "type": {
+                        "type": "string",
+                        "const": "dataset",
+                        "default": "dataset"
+                    },
                     "provider_id": {
                         "type": "string"
                     }
@@ -5095,6 +5195,7 @@
                     "dataset_schema",
                     "url",
                     "metadata",
+                    "type",
                     "provider_id"
                 ]
             },
@@ -5132,6 +5233,11 @@
                             ]
                         }
                     },
+                    "type": {
+                        "type": "string",
+                        "const": "model",
+                        "default": "model"
+                    },
                     "provider_id": {
                         "type": "string"
                     }
@@ -5141,6 +5247,7 @@
                     "identifier",
                     "llama_model",
                     "metadata",
+                    "type",
                     "provider_id"
                 ]
             },
@@ -5188,166 +5295,6 @@
                     "total_count"
                 ]
             },
-            "Parameter": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "oneOf": [
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "string",
-                                        "default": "string"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "number",
-                                        "default": "number"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "boolean",
-                                        "default": "boolean"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "array",
-                                        "default": "array"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "object",
-                                        "default": "object"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "json",
-                                        "default": "json"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "union",
-                                        "default": "union"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "chat_completion_input",
-                                        "default": "chat_completion_input"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "completion_input",
-                                        "default": "completion_input"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            },
-                            {
-                                "type": "object",
-                                "properties": {
-                                    "type": {
-                                        "type": "string",
-                                        "const": "agent_turn_input",
-                                        "default": "agent_turn_input"
-                                    }
-                                },
-                                "additionalProperties": false,
-                                "required": [
-                                    "type"
-                                ]
-                            }
-                        ]
-                    },
-                    "description": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "name",
-                    "type"
-                ]
-            },
             "ScoringFnDefWithProvider": {
                 "type": "object",
                 "properties": {
@@ -5382,12 +5329,6 @@
                             ]
                         }
                     },
-                    "parameters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Parameter"
-                        }
-                    },
                     "return_type": {
                         "oneOf": [
                             {
@@ -5532,27 +5473,21 @@
                             }
                         ]
                     },
-                    "context": {
-                        "type": "object",
-                        "properties": {
-                            "judge_model": {
-                                "type": "string"
+                    "params": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
                             },
-                            "prompt_template": {
-                                "type": "string"
-                            },
-                            "judge_score_regex": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
+                            {
+                                "$ref": "#/components/schemas/RegexParserScoringFnParams"
                             }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "judge_model"
                         ]
                     },
+                    "type": {
+                        "type": "string",
+                        "const": "scoring_fn",
+                        "default": "scoring_fn"
+                    },
                     "provider_id": {
                         "type": "string"
                     }
@@ -5561,8 +5496,8 @@
                 "required": [
                     "identifier",
                     "metadata",
-                    "parameters",
                     "return_type",
+                    "type",
                     "provider_id"
                 ]
             },
@@ -5572,7 +5507,7 @@
                     "identifier": {
                         "type": "string"
                     },
-                    "type": {
+                    "shield_type": {
                         "type": "string"
                     },
                     "params": {
@@ -5600,6 +5535,11 @@
                             ]
                         }
                     },
+                    "type": {
+                        "type": "string",
+                        "const": "shield",
+                        "default": "shield"
+                    },
                     "provider_id": {
                         "type": "string"
                     }
@@ -5607,8 +5547,9 @@
                 "additionalProperties": false,
                 "required": [
                     "identifier",
-                    "type",
+                    "shield_type",
                     "params",
+                    "type",
                     "provider_id"
                 ]
             },
@@ -5867,12 +5808,16 @@
             "JobCancelRequest": {
                 "type": "object",
                 "properties": {
+                    "task_id": {
+                        "type": "string"
+                    },
                     "job_id": {
                         "type": "string"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
+                    "task_id",
                     "job_id"
                 ]
             },
@@ -6575,10 +6520,45 @@
                     "shield"
                 ]
             },
+            "RunEvalRequest": {
+                "type": "object",
+                "properties": {
+                    "task_id": {
+                        "type": "string"
+                    },
+                    "task_config": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/BenchmarkEvalTaskConfig"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AppEvalTaskConfig"
+                            }
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "task_id",
+                    "task_config"
+                ]
+            },
+            "Job": {
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_id"
+                ]
+            },
             "RunShieldRequest": {
                 "type": "object",
                 "properties": {
-                    "shield_type": {
+                    "identifier": {
                         "type": "string"
                     },
                     "messages": {
@@ -6628,7 +6608,7 @@
                 },
                 "additionalProperties": false,
                 "required": [
-                    "shield_type",
+                    "identifier",
                     "messages",
                     "params"
                 ]
@@ -6674,9 +6654,23 @@
                         }
                     },
                     "scoring_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/RegexParserScoringFnParams"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
                         }
                     }
                 },
@@ -6708,9 +6702,23 @@
                         "type": "string"
                     },
                     "scoring_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/RegexParserScoringFnParams"
+                                        }
+                                    ]
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
                         }
                     },
                     "save_results_dataset": {
@@ -7063,13 +7071,19 @@
     ],
     "tags": [
         {
-            "name": "Memory"
+            "name": "ScoringFunctions"
         },
         {
-            "name": "Inference"
+            "name": "Agents"
         },
         {
-            "name": "Eval"
+            "name": "Shields"
+        },
+        {
+            "name": "Telemetry"
+        },
+        {
+            "name": "Safety"
         },
         {
             "name": "MemoryBanks"
@@ -7084,16 +7098,10 @@
             "name": "PostTraining"
         },
         {
-            "name": "Agents"
+            "name": "Inference"
         },
         {
-            "name": "Shields"
-        },
-        {
-            "name": "Telemetry"
-        },
-        {
-            "name": "Inspect"
+            "name": "Datasets"
         },
         {
             "name": "DatasetIO"
@@ -7102,17 +7110,17 @@
             "name": "SyntheticDataGeneration"
         },
         {
-            "name": "Datasets"
+            "name": "Memory"
+        },
+        {
+            "name": "Eval"
+        },
+        {
+            "name": "Inspect"
         },
         {
             "name": "Scoring"
         },
-        {
-            "name": "ScoringFunctions"
-        },
-        {
-            "name": "Safety"
-        },
         {
             "name": "BuiltinTool",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@@ -7377,13 +7385,29 @@
             "name": "AgentCandidate",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgentCandidate\" />"
         },
+        {
+            "name": "AppEvalTaskConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AppEvalTaskConfig\" />"
+        },
+        {
+            "name": "BenchmarkEvalTaskConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BenchmarkEvalTaskConfig\" />"
+        },
+        {
+            "name": "LLMAsJudgeScoringFnParams",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/LLMAsJudgeScoringFnParams\" />"
+        },
         {
             "name": "ModelCandidate",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ModelCandidate\" />"
         },
         {
-            "name": "EvaluateRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateRequest\" />"
+            "name": "RegexParserScoringFnParams",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegexParserScoringFnParams\" />"
+        },
+        {
+            "name": "EvaluateRowsRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateRowsRequest\" />"
         },
         {
             "name": "EvaluateResponse",
@@ -7393,14 +7417,6 @@
             "name": "ScoringResult",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoringResult\" />"
         },
-        {
-            "name": "EvaluateBatchRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateBatchRequest\" />"
-        },
-        {
-            "name": "Job",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Job\" />"
-        },
         {
             "name": "GetAgentsSessionRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/GetAgentsSessionRequest\" />"
@@ -7441,10 +7457,6 @@
             "name": "PaginatedRowsResult",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/PaginatedRowsResult\" />"
         },
-        {
-            "name": "Parameter",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Parameter\" />"
-        },
         {
             "name": "ScoringFnDefWithProvider",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoringFnDefWithProvider\" />"
@@ -7589,6 +7601,14 @@
             "name": "RegisterShieldRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegisterShieldRequest\" />"
         },
+        {
+            "name": "RunEvalRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RunEvalRequest\" />"
+        },
+        {
+            "name": "Job",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Job\" />"
+        },
         {
             "name": "RunShieldRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RunShieldRequest\" />"
@@ -7680,11 +7700,13 @@
                 "AgentTurnResponseStreamChunk",
                 "AgentTurnResponseTurnCompletePayload",
                 "AgentTurnResponseTurnStartPayload",
+                "AppEvalTaskConfig",
                 "Attachment",
                 "BatchChatCompletionRequest",
                 "BatchChatCompletionResponse",
                 "BatchCompletionRequest",
                 "BatchCompletionResponse",
+                "BenchmarkEvalTaskConfig",
                 "BuiltinTool",
                 "CancelTrainingJobRequest",
                 "ChatCompletionRequest",
@@ -7708,9 +7730,8 @@
                 "DoraFinetuningConfig",
                 "EmbeddingsRequest",
                 "EmbeddingsResponse",
-                "EvaluateBatchRequest",
-                "EvaluateRequest",
                 "EvaluateResponse",
+                "EvaluateRowsRequest",
                 "FinetuningAlgorithm",
                 "FunctionCallToolDefinition",
                 "GetAgentsSessionRequest",
@@ -7724,6 +7745,7 @@
                 "JobStatus",
                 "KeyValueMemoryBankDef",
                 "KeywordMemoryBankDef",
+                "LLMAsJudgeScoringFnParams",
                 "LogEventRequest",
                 "LogSeverity",
                 "LoraFinetuningConfig",
@@ -7735,7 +7757,6 @@
                 "ModelDefWithProvider",
                 "OptimizerConfig",
                 "PaginatedRowsResult",
-                "Parameter",
                 "PhotogenToolDefinition",
                 "PostTrainingJob",
                 "PostTrainingJobArtifactsResponse",
@@ -7748,6 +7769,7 @@
                 "QueryDocumentsRequest",
                 "QueryDocumentsResponse",
                 "RLHFAlgorithm",
+                "RegexParserScoringFnParams",
                 "RegisterDatasetRequest",
                 "RegisterMemoryBankRequest",
                 "RegisterModelRequest",
@@ -7756,6 +7778,7 @@
                 "RestAPIExecutionConfig",
                 "RestAPIMethod",
                 "RouteInfo",
+                "RunEvalRequest",
                 "RunShieldRequest",
                 "RunShieldResponse",
                 "SafetyViolation",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 7dd231965..f839e7bc0 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -218,6 +218,30 @@ components:
       - event_type
       - turn_id
       type: object
+    AppEvalTaskConfig:
+      additionalProperties: false
+      properties:
+        eval_candidate:
+          oneOf:
+          - $ref: '#/components/schemas/ModelCandidate'
+          - $ref: '#/components/schemas/AgentCandidate'
+        num_examples:
+          type: integer
+        scoring_params:
+          additionalProperties:
+            oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+          type: object
+        type:
+          const: app
+          default: app
+          type: string
+      required:
+      - type
+      - eval_candidate
+      - scoring_params
+      type: object
     Attachment:
       additionalProperties: false
       properties:
@@ -322,6 +346,23 @@ components:
       required:
       - completion_message_batch
       type: object
+    BenchmarkEvalTaskConfig:
+      additionalProperties: false
+      properties:
+        eval_candidate:
+          oneOf:
+          - $ref: '#/components/schemas/ModelCandidate'
+          - $ref: '#/components/schemas/AgentCandidate'
+        num_examples:
+          type: integer
+        type:
+          const: benchmark
+          default: benchmark
+          type: string
+      required:
+      - type
+      - eval_candidate
+      type: object
     BuiltinTool:
       enum:
       - brave_search
@@ -790,6 +831,10 @@ components:
           type: object
         provider_id:
           type: string
+        type:
+          const: dataset
+          default: dataset
+          type: string
         url:
           $ref: '#/components/schemas/URL'
       required:
@@ -797,6 +842,7 @@ components:
       - dataset_schema
       - url
       - metadata
+      - type
       - provider_id
       type: object
     DeleteAgentsRequest:
@@ -872,52 +918,6 @@ components:
       required:
       - embeddings
       type: object
-    EvaluateBatchRequest:
-      additionalProperties: false
-      properties:
-        candidate:
-          oneOf:
-          - $ref: '#/components/schemas/ModelCandidate'
-          - $ref: '#/components/schemas/AgentCandidate'
-        dataset_id:
-          type: string
-        scoring_functions:
-          items:
-            type: string
-          type: array
-      required:
-      - dataset_id
-      - candidate
-      - scoring_functions
-      type: object
-    EvaluateRequest:
-      additionalProperties: false
-      properties:
-        candidate:
-          oneOf:
-          - $ref: '#/components/schemas/ModelCandidate'
-          - $ref: '#/components/schemas/AgentCandidate'
-        input_rows:
-          items:
-            additionalProperties:
-              oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-            type: object
-          type: array
-        scoring_functions:
-          items:
-            type: string
-          type: array
-      required:
-      - input_rows
-      - candidate
-      - scoring_functions
-      type: object
     EvaluateResponse:
       additionalProperties: false
       properties:
@@ -941,6 +941,37 @@ components:
       - generations
       - scores
       type: object
+    EvaluateRowsRequest:
+      additionalProperties: false
+      properties:
+        input_rows:
+          items:
+            additionalProperties:
+              oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+            type: object
+          type: array
+        scoring_functions:
+          items:
+            type: string
+          type: array
+        task_config:
+          oneOf:
+          - $ref: '#/components/schemas/BenchmarkEvalTaskConfig'
+          - $ref: '#/components/schemas/AppEvalTaskConfig'
+        task_id:
+          type: string
+      required:
+      - task_id
+      - input_rows
+      - scoring_functions
+      - task_config
+      type: object
     FinetuningAlgorithm:
       enum:
       - full
@@ -1082,7 +1113,10 @@ components:
       properties:
         job_id:
           type: string
+        task_id:
+          type: string
       required:
+      - task_id
       - job_id
       type: object
     JobStatus:
@@ -1124,6 +1158,25 @@ components:
       - provider_id
       - type
       type: object
+    LLMAsJudgeScoringFnParams:
+      additionalProperties: false
+      properties:
+        judge_model:
+          type: string
+        judge_score_regexes:
+          items:
+            type: string
+          type: array
+        prompt_template:
+          type: string
+        type:
+          const: llm_as_judge
+          default: llm_as_judge
+          type: string
+      required:
+      - type
+      - judge_model
+      type: object
     LogEventRequest:
       additionalProperties: false
       properties:
@@ -1442,10 +1495,15 @@ components:
           type: object
         provider_id:
           type: string
+        type:
+          const: model
+          default: model
+          type: string
       required:
       - identifier
       - llama_model
       - metadata
+      - type
       - provider_id
       type: object
     OptimizerConfig:
@@ -1492,109 +1550,6 @@ components:
       - rows
       - total_count
       type: object
-    Parameter:
-      additionalProperties: false
-      properties:
-        description:
-          type: string
-        name:
-          type: string
-        type:
-          oneOf:
-          - additionalProperties: false
-            properties:
-              type:
-                const: string
-                default: string
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: number
-                default: number
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: boolean
-                default: boolean
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: array
-                default: array
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: object
-                default: object
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: json
-                default: json
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: union
-                default: union
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: chat_completion_input
-                default: chat_completion_input
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: completion_input
-                default: completion_input
-                type: string
-            required:
-            - type
-            type: object
-          - additionalProperties: false
-            properties:
-              type:
-                const: agent_turn_input
-                default: agent_turn_input
-                type: string
-            required:
-            - type
-            type: object
-      required:
-      - name
-      - type
-      type: object
     PhotogenToolDefinition:
       additionalProperties: false
       properties:
@@ -1844,6 +1799,20 @@ components:
       enum:
       - dpo
       type: string
+    RegexParserScoringFnParams:
+      additionalProperties: false
+      properties:
+        parsing_regexes:
+          items:
+            type: string
+          type: array
+        type:
+          const: regex_parser
+          default: regex_parser
+          type: string
+      required:
+      - type
+      type: object
     RegisterDatasetRequest:
       additionalProperties: false
       properties:
@@ -1952,9 +1921,24 @@ components:
       - method
       - provider_types
       type: object
+    RunEvalRequest:
+      additionalProperties: false
+      properties:
+        task_config:
+          oneOf:
+          - $ref: '#/components/schemas/BenchmarkEvalTaskConfig'
+          - $ref: '#/components/schemas/AppEvalTaskConfig'
+        task_id:
+          type: string
+      required:
+      - task_id
+      - task_config
+      type: object
     RunShieldRequest:
       additionalProperties: false
       properties:
+        identifier:
+          type: string
         messages:
           items:
             oneOf:
@@ -1973,10 +1957,8 @@ components:
             - type: array
             - type: object
           type: object
-        shield_type:
-          type: string
       required:
-      - shield_type
+      - identifier
       - messages
       - params
       type: object
@@ -2045,9 +2027,13 @@ components:
         save_results_dataset:
           type: boolean
         scoring_functions:
-          items:
-            type: string
-          type: array
+          additionalProperties:
+            oneOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+            - type: 'null'
+          type: object
       required:
       - dataset_id
       - scoring_functions
@@ -2081,9 +2067,13 @@ components:
             type: object
           type: array
         scoring_functions:
-          items:
-            type: string
-          type: array
+          additionalProperties:
+            oneOf:
+            - oneOf:
+              - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              - $ref: '#/components/schemas/RegexParserScoringFnParams'
+            - type: 'null'
+          type: object
       required:
       - input_rows
       - scoring_functions
@@ -2101,20 +2091,6 @@ components:
     ScoringFnDefWithProvider:
       additionalProperties: false
       properties:
-        context:
-          additionalProperties: false
-          properties:
-            judge_model:
-              type: string
-            judge_score_regex:
-              items:
-                type: string
-              type: array
-            prompt_template:
-              type: string
-          required:
-          - judge_model
-          type: object
         description:
           type: string
         identifier:
@@ -2129,10 +2105,10 @@ components:
             - type: array
             - type: object
           type: object
-        parameters:
-          items:
-            $ref: '#/components/schemas/Parameter'
-          type: array
+        params:
+          oneOf:
+          - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+          - $ref: '#/components/schemas/RegexParserScoringFnParams'
         provider_id:
           type: string
         return_type:
@@ -2227,11 +2203,15 @@ components:
             required:
             - type
             type: object
+        type:
+          const: scoring_fn
+          default: scoring_fn
+          type: string
       required:
       - identifier
       - metadata
-      - parameters
       - return_type
+      - type
       - provider_id
       type: object
     ScoringResult:
@@ -2361,12 +2341,17 @@ components:
           type: object
         provider_id:
           type: string
+        shield_type:
+          type: string
         type:
+          const: shield
+          default: shield
           type: string
       required:
       - identifier
-      - type
+      - shield_type
       - params
+      - type
       - provider_id
       type: object
     SpanEndPayload:
@@ -2998,7 +2983,7 @@ info:
   description: "This is the specification of the llama stack that provides\n     \
     \           a set of endpoints and their corresponding interfaces that are tailored\
     \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-10-31 14:28:52.128905"
+    \ draft and subject to change.\n                Generated at 2024-11-07 22:26:27.169134"
   title: '[DRAFT] Llama Stack Specification'
   version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -3387,7 +3372,7 @@ paths:
           description: OK
       tags:
       - Datasets
-  /eval/evaluate:
+  /eval/evaluate_rows:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -3401,7 +3386,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/EvaluateRequest'
+              $ref: '#/components/schemas/EvaluateRowsRequest'
         required: true
       responses:
         '200':
@@ -3412,31 +3397,6 @@ paths:
           description: OK
       tags:
       - Eval
-  /eval/evaluate_batch:
-    post:
-      parameters:
-      - description: JSON-encoded provider data which will be made available to the
-          adapter servicing the API
-        in: header
-        name: X-LlamaStack-ProviderData
-        required: false
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EvaluateBatchRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Job'
-          description: OK
-      tags:
-      - Eval
   /eval/job/cancel:
     post:
       parameters:
@@ -3461,6 +3421,11 @@ paths:
   /eval/job/result:
     get:
       parameters:
+      - in: query
+        name: task_id
+        required: true
+        schema:
+          type: string
       - in: query
         name: job_id
         required: true
@@ -3485,6 +3450,11 @@ paths:
   /eval/job/status:
     get:
       parameters:
+      - in: query
+        name: task_id
+        required: true
+        schema:
+          type: string
       - in: query
         name: job_id
         required: true
@@ -3508,6 +3478,31 @@ paths:
           description: OK
       tags:
       - Eval
+  /eval/run_eval:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RunEvalRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Job'
+          description: OK
+      tags:
+      - Eval
   /health:
     get:
       parameters:
@@ -4143,7 +4138,7 @@ paths:
     get:
       parameters:
       - in: query
-        name: shield_type
+        name: identifier
         required: true
         schema:
           type: string
@@ -4280,23 +4275,23 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
-- name: Memory
-- name: Inference
-- name: Eval
+- name: ScoringFunctions
+- name: Agents
+- name: Shields
+- name: Telemetry
+- name: Safety
 - name: MemoryBanks
 - name: Models
 - name: BatchInference
 - name: PostTraining
-- name: Agents
-- name: Shields
-- name: Telemetry
-- name: Inspect
+- name: Inference
+- name: Datasets
 - name: DatasetIO
 - name: SyntheticDataGeneration
-- name: Datasets
+- name: Memory
+- name: Eval
+- name: Inspect
 - name: Scoring
-- name: ScoringFunctions
-- name: Safety
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
   name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
@@ -4503,21 +4498,28 @@ tags:
   name: EmbeddingsResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgentCandidate" />
   name: AgentCandidate
+- description: <SchemaDefinition schemaRef="#/components/schemas/AppEvalTaskConfig"
+    />
+  name: AppEvalTaskConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/BenchmarkEvalTaskConfig"
+    />
+  name: BenchmarkEvalTaskConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/LLMAsJudgeScoringFnParams"
+    />
+  name: LLMAsJudgeScoringFnParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/ModelCandidate" />
   name: ModelCandidate
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateRequest"
+- description: <SchemaDefinition schemaRef="#/components/schemas/RegexParserScoringFnParams"
     />
-  name: EvaluateRequest
+  name: RegexParserScoringFnParams
+- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateRowsRequest"
+    />
+  name: EvaluateRowsRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateResponse"
     />
   name: EvaluateResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/ScoringResult" />
   name: ScoringResult
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateBatchRequest"
-    />
-  name: EvaluateBatchRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/Job" />
-  name: Job
 - description: <SchemaDefinition schemaRef="#/components/schemas/GetAgentsSessionRequest"
     />
   name: GetAgentsSessionRequest
@@ -4550,8 +4552,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/PaginatedRowsResult"
     />
   name: PaginatedRowsResult
-- description: <SchemaDefinition schemaRef="#/components/schemas/Parameter" />
-  name: Parameter
 - description: <SchemaDefinition schemaRef="#/components/schemas/ScoringFnDefWithProvider"
     />
   name: ScoringFnDefWithProvider
@@ -4659,6 +4659,10 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/RegisterShieldRequest"
     />
   name: RegisterShieldRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/RunEvalRequest" />
+  name: RunEvalRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/Job" />
+  name: Job
 - description: <SchemaDefinition schemaRef="#/components/schemas/RunShieldRequest"
     />
   name: RunShieldRequest
@@ -4734,11 +4738,13 @@ x-tagGroups:
   - AgentTurnResponseStreamChunk
   - AgentTurnResponseTurnCompletePayload
   - AgentTurnResponseTurnStartPayload
+  - AppEvalTaskConfig
   - Attachment
   - BatchChatCompletionRequest
   - BatchChatCompletionResponse
   - BatchCompletionRequest
   - BatchCompletionResponse
+  - BenchmarkEvalTaskConfig
   - BuiltinTool
   - CancelTrainingJobRequest
   - ChatCompletionRequest
@@ -4762,9 +4768,8 @@ x-tagGroups:
   - DoraFinetuningConfig
   - EmbeddingsRequest
   - EmbeddingsResponse
-  - EvaluateBatchRequest
-  - EvaluateRequest
   - EvaluateResponse
+  - EvaluateRowsRequest
   - FinetuningAlgorithm
   - FunctionCallToolDefinition
   - GetAgentsSessionRequest
@@ -4778,6 +4783,7 @@ x-tagGroups:
   - JobStatus
   - KeyValueMemoryBankDef
   - KeywordMemoryBankDef
+  - LLMAsJudgeScoringFnParams
   - LogEventRequest
   - LogSeverity
   - LoraFinetuningConfig
@@ -4789,7 +4795,6 @@ x-tagGroups:
   - ModelDefWithProvider
   - OptimizerConfig
   - PaginatedRowsResult
-  - Parameter
   - PhotogenToolDefinition
   - PostTrainingJob
   - PostTrainingJobArtifactsResponse
@@ -4802,6 +4807,7 @@ x-tagGroups:
   - QueryDocumentsRequest
   - QueryDocumentsResponse
   - RLHFAlgorithm
+  - RegexParserScoringFnParams
   - RegisterDatasetRequest
   - RegisterMemoryBankRequest
   - RegisterModelRequest
@@ -4810,6 +4816,7 @@ x-tagGroups:
   - RestAPIExecutionConfig
   - RestAPIMethod
   - RouteInfo
+  - RunEvalRequest
   - RunShieldRequest
   - RunShieldResponse
   - SafetyViolation