diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index 871c01a80..f9f56119b 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -33,14 +33,16 @@ schema_utils.json_schema_type = json_schema_type
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_stack.apis.agents import *  # noqa: F403
-from llama_stack.apis.dataset import *  # noqa: F403
-from llama_stack.apis.evals import *  # noqa: F403
+from llama_stack.apis.datasets import *  # noqa: F403
+from llama_stack.apis.datasetio import *  # noqa: F403
+from llama_stack.apis.scoring import *  # noqa: F403
+from llama_stack.apis.scoring_functions import *  # noqa: F403
+from llama_stack.apis.eval import *  # noqa: F403
 from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.apis.batch_inference import *  # noqa: F403
 from llama_stack.apis.memory import *  # noqa: F403
 from llama_stack.apis.telemetry import *  # noqa: F403
 from llama_stack.apis.post_training import *  # noqa: F403
-from llama_stack.apis.reward_scoring import *  # noqa: F403
 from llama_stack.apis.synthetic_data_generation import *  # noqa: F403
 from llama_stack.apis.safety import *  # noqa: F403
 from llama_stack.apis.models import *  # noqa: F403
@@ -54,14 +56,16 @@ class LlamaStack(
     Inference,
     BatchInference,
     Agents,
-    RewardScoring,
     Safety,
     SyntheticDataGeneration,
     Datasets,
     Telemetry,
     PostTraining,
     Memory,
-    Evaluations,
+    Eval,
+    Scoring,
+    ScoringFunctions,
+    DatasetIO,
     Models,
     Shields,
     Inspect,
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 8e6683931..886634fba 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
     "info": {
         "title": "[DRAFT] Llama Stack Specification",
         "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-18 20:48:17.730988"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-24 17:40:59.576117"
     },
     "servers": [
         {
@@ -109,39 +109,6 @@
                 }
             }
         },
-        "/evaluate/job/cancel": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "Evaluations"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/CancelEvaluationJobRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/post_training/job/cancel": {
             "post": {
                 "responses": {
@@ -389,39 +356,6 @@
                 }
             }
         },
-        "/datasets/create": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "Datasets"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/CreateDatasetRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/agents/delete": {
             "post": {
                 "responses": {
@@ -488,39 +422,6 @@
                 }
             }
         },
-        "/datasets/delete": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "Datasets"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/DeleteDatasetRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/inference/embeddings": {
             "post": {
                 "responses": {
@@ -561,7 +462,7 @@
                 }
             }
         },
-        "/evaluate/question_answering/": {
+        "/eval/evaluate": {
             "post": {
                 "responses": {
                     "200": {
@@ -569,14 +470,14 @@
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJob"
+                                    "$ref": "#/components/schemas/EvaluateResponse"
                                 }
                             }
                         }
                     }
                 },
                 "tags": [
-                    "Evaluations"
+                    "Eval"
                 ],
                 "parameters": [
                     {
@@ -593,7 +494,7 @@
                     "content": {
                         "application/json": {
                             "schema": {
-                                "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest"
+                                "$ref": "#/components/schemas/EvaluateRequest"
                             }
                         }
                     },
@@ -601,7 +502,7 @@
                 }
             }
         },
-        "/evaluate/summarization/": {
+        "/eval/evaluate_batch": {
             "post": {
                 "responses": {
                     "200": {
@@ -609,14 +510,14 @@
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJob"
+                                    "$ref": "#/components/schemas/Job"
                                 }
                             }
                         }
                     }
                 },
                 "tags": [
-                    "Evaluations"
+                    "Eval"
                 ],
                 "parameters": [
                     {
@@ -633,47 +534,7 @@
                     "content": {
                         "application/json": {
                             "schema": {
-                                "$ref": "#/components/schemas/EvaluateSummarizationRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/evaluate/text_generation/": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJob"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "Evaluations"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/EvaluateTextGenerationRequest"
+                                "$ref": "#/components/schemas/EvaluateBatchRequest"
                             }
                         }
                     },
@@ -763,6 +624,14 @@
                             "type": "string"
                         }
                     },
+                    {
+                        "name": "session_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
                     {
                         "name": "turn_id",
                         "in": "query",
@@ -817,6 +686,14 @@
                             "type": "string"
                         }
                     },
+                    {
+                        "name": "session_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
                     {
                         "name": "turn_id",
                         "in": "query",
@@ -845,7 +722,14 @@
                         "content": {
                             "application/json": {
                                 "schema": {
-                                    "$ref": "#/components/schemas/TrainEvalDataset"
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/DatasetDefWithProvider"
+                                        },
+                                        {
+                                            "type": "null"
+                                        }
+                                    ]
                                 }
                             }
                         }
@@ -856,7 +740,7 @@
                 ],
                 "parameters": [
                     {
-                        "name": "dataset_uuid",
+                        "name": "dataset_identifier",
                         "in": "query",
                         "required": true,
                         "schema": {
@@ -875,150 +759,6 @@
                 ]
             }
         },
-        "/evaluate/job/artifacts": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJobArtifactsResponse"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "Evaluations"
-                ],
-                "parameters": [
-                    {
-                        "name": "job_uuid",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
-        "/evaluate/job/logs": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJobLogStream"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "Evaluations"
-                ],
-                "parameters": [
-                    {
-                        "name": "job_uuid",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
-        "/evaluate/job/status": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJobStatusResponse"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "Evaluations"
-                ],
-                "parameters": [
-                    {
-                        "name": "job_uuid",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
-        "/evaluate/jobs": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/jsonl": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluationJob"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "Evaluations"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/memory_banks/get": {
             "get": {
                 "responses": {
@@ -1122,6 +862,113 @@
                 ]
             }
         },
+        "/datasetio/get_rows_paginated": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PaginatedRowsResult"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "DatasetIO"
+                ],
+                "parameters": [
+                    {
+                        "name": "dataset_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "rows_in_page",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "page_token",
+                        "in": "query",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "filter_condition",
+                        "in": "query",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
+        "/scoring_functions/get": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/ScoringFunctionDefWithProvider"
+                                        },
+                                        {
+                                            "type": "null"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "ScoringFunctions"
+                ],
+                "parameters": [
+                    {
+                        "name": "name",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/shields/get": {
             "get": {
                 "responses": {
@@ -1412,6 +1259,152 @@
                 }
             }
         },
+        "/eval/job/cancel": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/JobCancelRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/eval/job/result": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/EvaluateResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "parameters": [
+                    {
+                        "name": "job_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
+        "/eval/job/status": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/JobStatus"
+                                        },
+                                        {
+                                            "type": "null"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Eval"
+                ],
+                "parameters": [
+                    {
+                        "name": "job_id",
+                        "in": "query",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
+        "/datasets/list": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/jsonl": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/DatasetDefWithProvider"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Datasets"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/memory_banks/list": {
             "get": {
                 "responses": {
@@ -1554,6 +1547,36 @@
                 ]
             }
         },
+        "/scoring_functions/list": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/jsonl": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ScoringFunctionDefWithProvider"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "ScoringFunctions"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ]
+            }
+        },
         "/shields/list": {
             "get": {
                 "responses": {
@@ -1697,6 +1720,39 @@
                 }
             }
         },
+        "/datasets/register": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                },
+                "tags": [
+                    "Datasets"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RegisterDatasetRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/memory_banks/register": {
             "post": {
                 "responses": {
@@ -1763,6 +1819,39 @@
                 }
             }
         },
+        "/scoring_functions/register": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                },
+                "tags": [
+                    "ScoringFunctions"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RegisterScoringFunctionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/shields/register": {
             "post": {
                 "responses": {
@@ -1796,46 +1885,6 @@
                 }
             }
         },
-        "/reward_scoring/score": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/RewardScoringResponse"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "RewardScoring"
-                ],
-                "parameters": [
-                    {
-                        "name": "X-LlamaStack-ProviderData",
-                        "in": "header",
-                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
-                        "required": false,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/RewardScoreRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/safety/run_shield": {
             "post": {
                 "responses": {
@@ -1876,6 +1925,86 @@
                 }
             }
         },
+        "/scoring/score": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ScoreResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Scoring"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/ScoreRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/scoring/score_batch": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/ScoreBatchResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Scoring"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/ScoreBatchRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
         "/post_training/supervised_fine_tune": {
             "post": {
                 "responses": {
@@ -2571,18 +2700,6 @@
                     "completion_message_batch"
                 ]
             },
-            "CancelEvaluationJobRequest": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid"
-                ]
-            },
             "CancelTrainingJobRequest": {
                 "type": "object",
                 "properties": {
@@ -2635,6 +2752,90 @@
                     "tool_prompt_format": {
                         "$ref": "#/components/schemas/ToolPromptFormat"
                     },
+                    "response_format": {
+                        "oneOf": [
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "json_schema",
+                                        "default": "json_schema"
+                                    },
+                                    "schema": {
+                                        "type": "object",
+                                        "additionalProperties": {
+                                            "oneOf": [
+                                                {
+                                                    "type": "null"
+                                                },
+                                                {
+                                                    "type": "boolean"
+                                                },
+                                                {
+                                                    "type": "number"
+                                                },
+                                                {
+                                                    "type": "string"
+                                                },
+                                                {
+                                                    "type": "array"
+                                                },
+                                                {
+                                                    "type": "object"
+                                                }
+                                            ]
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type",
+                                    "schema"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "grammar",
+                                        "default": "grammar"
+                                    },
+                                    "bnf": {
+                                        "type": "object",
+                                        "additionalProperties": {
+                                            "oneOf": [
+                                                {
+                                                    "type": "null"
+                                                },
+                                                {
+                                                    "type": "boolean"
+                                                },
+                                                {
+                                                    "type": "number"
+                                                },
+                                                {
+                                                    "type": "string"
+                                                },
+                                                {
+                                                    "type": "array"
+                                                },
+                                                {
+                                                    "type": "object"
+                                                }
+                                            ]
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type",
+                                    "bnf"
+                                ]
+                            }
+                        ]
+                    },
                     "stream": {
                         "type": "boolean"
                     },
@@ -2807,6 +3008,90 @@
                     "sampling_params": {
                         "$ref": "#/components/schemas/SamplingParams"
                     },
+                    "response_format": {
+                        "oneOf": [
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "json_schema",
+                                        "default": "json_schema"
+                                    },
+                                    "schema": {
+                                        "type": "object",
+                                        "additionalProperties": {
+                                            "oneOf": [
+                                                {
+                                                    "type": "null"
+                                                },
+                                                {
+                                                    "type": "boolean"
+                                                },
+                                                {
+                                                    "type": "number"
+                                                },
+                                                {
+                                                    "type": "string"
+                                                },
+                                                {
+                                                    "type": "array"
+                                                },
+                                                {
+                                                    "type": "object"
+                                                }
+                                            ]
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type",
+                                    "schema"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "grammar",
+                                        "default": "grammar"
+                                    },
+                                    "bnf": {
+                                        "type": "object",
+                                        "additionalProperties": {
+                                            "oneOf": [
+                                                {
+                                                    "type": "null"
+                                                },
+                                                {
+                                                    "type": "boolean"
+                                                },
+                                                {
+                                                    "type": "number"
+                                                },
+                                                {
+                                                    "type": "string"
+                                                },
+                                                {
+                                                    "type": "array"
+                                                },
+                                                {
+                                                    "type": "object"
+                                                }
+                                            ]
+                                        }
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type",
+                                    "bnf"
+                                ]
+                            }
+                        ]
+                    },
                     "stream": {
                         "type": "boolean"
                     },
@@ -4094,77 +4379,6 @@
                     "error"
                 ]
             },
-            "TrainEvalDataset": {
-                "type": "object",
-                "properties": {
-                    "columns": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/TrainEvalDatasetColumnType"
-                        }
-                    },
-                    "content_url": {
-                        "$ref": "#/components/schemas/URL"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "columns",
-                    "content_url"
-                ],
-                "title": "Dataset to be used for training or evaluating language models."
-            },
-            "TrainEvalDatasetColumnType": {
-                "type": "string",
-                "enum": [
-                    "dialog",
-                    "text",
-                    "media",
-                    "number",
-                    "json"
-                ]
-            },
-            "CreateDatasetRequest": {
-                "type": "object",
-                "properties": {
-                    "uuid": {
-                        "type": "string"
-                    },
-                    "dataset": {
-                        "$ref": "#/components/schemas/TrainEvalDataset"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "uuid",
-                    "dataset"
-                ]
-            },
             "DeleteAgentsRequest": {
                 "type": "object",
                 "properties": {
@@ -4193,18 +4407,6 @@
                     "session_id"
                 ]
             },
-            "DeleteDatasetRequest": {
-                "type": "object",
-                "properties": {
-                    "dataset_uuid": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dataset_uuid"
-                ]
-            },
             "EmbeddingsRequest": {
                 "type": "object",
                 "properties": {
@@ -4262,74 +4464,251 @@
                     "embeddings"
                 ]
             },
-            "EvaluateQuestionAnsweringRequest": {
+            "AgentCandidate": {
                 "type": "object",
                 "properties": {
-                    "metrics": {
+                    "type": {
+                        "type": "string",
+                        "const": "agent",
+                        "default": "agent"
+                    },
+                    "config": {
+                        "$ref": "#/components/schemas/AgentConfig"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "config"
+                ]
+            },
+            "ModelCandidate": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "model",
+                        "default": "model"
+                    },
+                    "model": {
+                        "type": "string"
+                    },
+                    "sampling_params": {
+                        "$ref": "#/components/schemas/SamplingParams"
+                    },
+                    "system_message": {
+                        "$ref": "#/components/schemas/SystemMessage"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model",
+                    "sampling_params"
+                ]
+            },
+            "EvaluateRequest": {
+                "type": "object",
+                "properties": {
+                    "input_rows": {
                         "type": "array",
                         "items": {
-                            "type": "string",
-                            "enum": [
-                                "em",
-                                "f1"
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "candidate": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/ModelCandidate"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentCandidate"
+                            }
+                        ]
+                    },
+                    "scoring_functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input_rows",
+                    "candidate",
+                    "scoring_functions"
+                ]
+            },
+            "EvaluateResponse": {
+                "type": "object",
+                "properties": {
+                    "generations": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "scores": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "generations",
+                    "scores"
+                ]
+            },
+            "ScoringResult": {
+                "type": "object",
+                "properties": {
+                    "score_rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "aggregated_results": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
                             ]
                         }
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "metrics"
+                    "score_rows",
+                    "aggregated_results"
                 ]
             },
-            "EvaluationJob": {
+            "EvaluateBatchRequest": {
                 "type": "object",
                 "properties": {
-                    "job_uuid": {
+                    "dataset_id": {
+                        "type": "string"
+                    },
+                    "candidate": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/ModelCandidate"
+                            },
+                            {
+                                "$ref": "#/components/schemas/AgentCandidate"
+                            }
+                        ]
+                    },
+                    "scoring_functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dataset_id",
+                    "candidate",
+                    "scoring_functions"
+                ]
+            },
+            "Job": {
+                "type": "object",
+                "properties": {
+                    "job_id": {
                         "type": "string"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "job_uuid"
-                ]
-            },
-            "EvaluateSummarizationRequest": {
-                "type": "object",
-                "properties": {
-                    "metrics": {
-                        "type": "array",
-                        "items": {
-                            "type": "string",
-                            "enum": [
-                                "rouge",
-                                "bleu"
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metrics"
-                ]
-            },
-            "EvaluateTextGenerationRequest": {
-                "type": "object",
-                "properties": {
-                    "metrics": {
-                        "type": "array",
-                        "items": {
-                            "type": "string",
-                            "enum": [
-                                "perplexity",
-                                "rouge",
-                                "bleu"
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metrics"
+                    "job_id"
                 ]
             },
             "GetAgentsSessionRequest": {
@@ -4517,41 +4896,216 @@
                     "step"
                 ]
             },
-            "EvaluationJobArtifactsResponse": {
+            "DatasetDefWithProvider": {
                 "type": "object",
                 "properties": {
-                    "job_uuid": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "dataset_schema": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "string",
+                                            "default": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "number",
+                                            "default": "number"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "boolean",
+                                            "default": "boolean"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "array",
+                                            "default": "array"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "object",
+                                            "default": "object"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "json",
+                                            "default": "json"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "union",
+                                            "default": "union"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "custom",
+                                            "default": "custom"
+                                        },
+                                        "validator_class": {
+                                            "type": "string"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type",
+                                        "validator_class"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "chat_completion_input",
+                                            "default": "chat_completion_input"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "completion_input",
+                                            "default": "completion_input"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                },
+                                {
+                                    "type": "object",
+                                    "properties": {
+                                        "type": {
+                                            "type": "string",
+                                            "const": "agent_turn_input",
+                                            "default": "agent_turn_input"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "type"
+                                    ]
+                                }
+                            ]
+                        }
+                    },
+                    "url": {
+                        "$ref": "#/components/schemas/URL"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "provider_id": {
                         "type": "string"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "job_uuid"
-                ],
-                "title": "Artifacts of a evaluation job."
-            },
-            "EvaluationJobLogStream": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid"
-                ]
-            },
-            "EvaluationJobStatusResponse": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid"
+                    "identifier",
+                    "dataset_schema",
+                    "url",
+                    "metadata",
+                    "provider_id"
                 ]
             },
             "ModelDefWithProvider": {
@@ -4600,6 +5154,458 @@
                     "provider_id"
                 ]
             },
+            "PaginatedRowsResult": {
+                "type": "object",
+                "properties": {
+                    "rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "total_count": {
+                        "type": "integer"
+                    },
+                    "next_page_token": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "rows",
+                    "total_count"
+                ]
+            },
+            "Parameter": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "oneOf": [
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "string",
+                                        "default": "string"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "number",
+                                        "default": "number"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "boolean",
+                                        "default": "boolean"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "array",
+                                        "default": "array"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "object",
+                                        "default": "object"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "json",
+                                        "default": "json"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "union",
+                                        "default": "union"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "custom",
+                                        "default": "custom"
+                                    },
+                                    "validator_class": {
+                                        "type": "string"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type",
+                                    "validator_class"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "chat_completion_input",
+                                        "default": "chat_completion_input"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "completion_input",
+                                        "default": "completion_input"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "agent_turn_input",
+                                        "default": "agent_turn_input"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            }
+                        ]
+                    },
+                    "description": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "name",
+                    "type"
+                ]
+            },
+            "ScoringFunctionDefWithProvider": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "description": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "parameters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Parameter"
+                        }
+                    },
+                    "return_type": {
+                        "oneOf": [
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "string",
+                                        "default": "string"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "number",
+                                        "default": "number"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "boolean",
+                                        "default": "boolean"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "array",
+                                        "default": "array"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "object",
+                                        "default": "object"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "json",
+                                        "default": "json"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "union",
+                                        "default": "union"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "custom",
+                                        "default": "custom"
+                                    },
+                                    "validator_class": {
+                                        "type": "string"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type",
+                                    "validator_class"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "chat_completion_input",
+                                        "default": "chat_completion_input"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "completion_input",
+                                        "default": "completion_input"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            },
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "type": {
+                                        "type": "string",
+                                        "const": "agent_turn_input",
+                                        "default": "agent_turn_input"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "required": [
+                                    "type"
+                                ]
+                            }
+                        ]
+                    },
+                    "context": {
+                        "type": "object",
+                        "properties": {
+                            "judge_model": {
+                                "type": "string"
+                            },
+                            "prompt_template": {
+                                "type": "string"
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "judge_model"
+                        ]
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "metadata",
+                    "parameters",
+                    "return_type",
+                    "provider_id"
+                ]
+            },
             "ShieldDefWithProvider": {
                 "type": "object",
                 "properties": {
@@ -4898,6 +5904,25 @@
                     "documents"
                 ]
             },
+            "JobCancelRequest": {
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_id"
+                ]
+            },
+            "JobStatus": {
+                "type": "string",
+                "enum": [
+                    "completed",
+                    "in_progress"
+                ]
+            },
             "ProviderInfo": {
                 "type": "object",
                 "properties": {
@@ -5315,10 +6340,10 @@
                         "$ref": "#/components/schemas/URL"
                     },
                     "dataset": {
-                        "$ref": "#/components/schemas/TrainEvalDataset"
+                        "type": "string"
                     },
                     "validation_dataset": {
-                        "$ref": "#/components/schemas/TrainEvalDataset"
+                        "type": "string"
                     },
                     "algorithm": {
                         "$ref": "#/components/schemas/RLHFAlgorithm"
@@ -5517,6 +6542,18 @@
                     "scores"
                 ]
             },
+            "RegisterDatasetRequest": {
+                "type": "object",
+                "properties": {
+                    "dataset_def": {
+                        "$ref": "#/components/schemas/DatasetDefWithProvider"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dataset_def"
+                ]
+            },
             "RegisterMemoryBankRequest": {
                 "type": "object",
                 "properties": {
@@ -5554,6 +6591,18 @@
                     "model"
                 ]
             },
+            "RegisterScoringFunctionRequest": {
+                "type": "object",
+                "properties": {
+                    "function_def": {
+                        "$ref": "#/components/schemas/ScoringFunctionDefWithProvider"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "function_def"
+                ]
+            },
             "RegisterShieldRequest": {
                 "type": "object",
                 "properties": {
@@ -5566,153 +6615,6 @@
                     "shield"
                 ]
             },
-            "DialogGenerations": {
-                "type": "object",
-                "properties": {
-                    "dialog": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/UserMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/SystemMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/ToolResponseMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/CompletionMessage"
-                                }
-                            ]
-                        }
-                    },
-                    "sampled_generations": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/UserMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/SystemMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/ToolResponseMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/CompletionMessage"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dialog",
-                    "sampled_generations"
-                ]
-            },
-            "RewardScoreRequest": {
-                "type": "object",
-                "properties": {
-                    "dialog_generations": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/DialogGenerations"
-                        }
-                    },
-                    "model": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dialog_generations",
-                    "model"
-                ]
-            },
-            "RewardScoringResponse": {
-                "type": "object",
-                "properties": {
-                    "scored_generations": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ScoredDialogGenerations"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "scored_generations"
-                ],
-                "title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
-            },
-            "ScoredDialogGenerations": {
-                "type": "object",
-                "properties": {
-                    "dialog": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/UserMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/SystemMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/ToolResponseMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/CompletionMessage"
-                                }
-                            ]
-                        }
-                    },
-                    "scored_generations": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ScoredMessage"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dialog",
-                    "scored_generations"
-                ]
-            },
-            "ScoredMessage": {
-                "type": "object",
-                "properties": {
-                    "message": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/UserMessage"
-                            },
-                            {
-                                "$ref": "#/components/schemas/SystemMessage"
-                            },
-                            {
-                                "$ref": "#/components/schemas/ToolResponseMessage"
-                            },
-                            {
-                                "$ref": "#/components/schemas/CompletionMessage"
-                            }
-                        ]
-                    },
-                    "score": {
-                        "type": "number"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "message",
-                    "score"
-                ]
-            },
             "RunShieldRequest": {
                 "type": "object",
                 "properties": {
@@ -5780,6 +6682,106 @@
                 },
                 "additionalProperties": false
             },
+            "ScoreRequest": {
+                "type": "object",
+                "properties": {
+                    "input_rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "scoring_functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input_rows",
+                    "scoring_functions"
+                ]
+            },
+            "ScoreResponse": {
+                "type": "object",
+                "properties": {
+                    "results": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "results"
+                ]
+            },
+            "ScoreBatchRequest": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string"
+                    },
+                    "scoring_functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    },
+                    "save_results_dataset": {
+                        "type": "boolean"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dataset_id",
+                    "scoring_functions",
+                    "save_results_dataset"
+                ]
+            },
+            "ScoreBatchResponse": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string"
+                    },
+                    "results": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "results"
+                ]
+            },
             "DoraFinetuningConfig": {
                 "type": "object",
                 "properties": {
@@ -5892,10 +6894,10 @@
                         "type": "string"
                     },
                     "dataset": {
-                        "$ref": "#/components/schemas/TrainEvalDataset"
+                        "type": "string"
                     },
                     "validation_dataset": {
-                        "$ref": "#/components/schemas/TrainEvalDataset"
+                        "type": "string"
                     },
                     "algorithm": {
                         "$ref": "#/components/schemas/FinetuningAlgorithm"
@@ -6034,7 +7036,29 @@
                     "synthetic_data": {
                         "type": "array",
                         "items": {
-                            "$ref": "#/components/schemas/ScoredDialogGenerations"
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
                         }
                     },
                     "statistics": {
@@ -6079,49 +7103,55 @@
     ],
     "tags": [
         {
-            "name": "Models"
+            "name": "Eval"
         },
         {
-            "name": "RewardScoring"
-        },
-        {
-            "name": "MemoryBanks"
-        },
-        {
-            "name": "Shields"
+            "name": "ScoringFunctions"
         },
         {
             "name": "SyntheticDataGeneration"
         },
-        {
-            "name": "Inference"
-        },
         {
             "name": "Inspect"
         },
-        {
-            "name": "BatchInference"
-        },
-        {
-            "name": "Memory"
-        },
-        {
-            "name": "Datasets"
-        },
-        {
-            "name": "Agents"
-        },
         {
             "name": "PostTraining"
         },
         {
-            "name": "Telemetry"
+            "name": "Models"
         },
         {
             "name": "Safety"
         },
         {
-            "name": "Evaluations"
+            "name": "MemoryBanks"
+        },
+        {
+            "name": "DatasetIO"
+        },
+        {
+            "name": "Memory"
+        },
+        {
+            "name": "Scoring"
+        },
+        {
+            "name": "Shields"
+        },
+        {
+            "name": "Datasets"
+        },
+        {
+            "name": "Inference"
+        },
+        {
+            "name": "Telemetry"
+        },
+        {
+            "name": "BatchInference"
+        },
+        {
+            "name": "Agents"
         },
         {
             "name": "BuiltinTool",
@@ -6199,10 +7229,6 @@
             "name": "BatchCompletionResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchCompletionResponse\" />"
         },
-        {
-            "name": "CancelEvaluationJobRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CancelEvaluationJobRequest\" />"
-        },
         {
             "name": "CancelTrainingJobRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CancelTrainingJobRequest\" />"
@@ -6371,18 +7397,6 @@
             "name": "ViolationLevel",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ViolationLevel\" />"
         },
-        {
-            "name": "TrainEvalDataset",
-            "description": "Dataset to be used for training or evaluating language models.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/TrainEvalDataset\" />"
-        },
-        {
-            "name": "TrainEvalDatasetColumnType",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/TrainEvalDatasetColumnType\" />"
-        },
-        {
-            "name": "CreateDatasetRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CreateDatasetRequest\" />"
-        },
         {
             "name": "DeleteAgentsRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteAgentsRequest\" />"
@@ -6391,10 +7405,6 @@
             "name": "DeleteAgentsSessionRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteAgentsSessionRequest\" />"
         },
-        {
-            "name": "DeleteDatasetRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteDatasetRequest\" />"
-        },
         {
             "name": "EmbeddingsRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EmbeddingsRequest\" />"
@@ -6404,20 +7414,32 @@
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EmbeddingsResponse\" />"
         },
         {
-            "name": "EvaluateQuestionAnsweringRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateQuestionAnsweringRequest\" />"
+            "name": "AgentCandidate",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgentCandidate\" />"
         },
         {
-            "name": "EvaluationJob",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluationJob\" />"
+            "name": "ModelCandidate",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ModelCandidate\" />"
         },
         {
-            "name": "EvaluateSummarizationRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateSummarizationRequest\" />"
+            "name": "EvaluateRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateRequest\" />"
         },
         {
-            "name": "EvaluateTextGenerationRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateTextGenerationRequest\" />"
+            "name": "EvaluateResponse",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateResponse\" />"
+        },
+        {
+            "name": "ScoringResult",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoringResult\" />"
+        },
+        {
+            "name": "EvaluateBatchRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluateBatchRequest\" />"
+        },
+        {
+            "name": "Job",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Job\" />"
         },
         {
             "name": "GetAgentsSessionRequest",
@@ -6448,21 +7470,25 @@
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgentStepResponse\" />"
         },
         {
-            "name": "EvaluationJobArtifactsResponse",
-            "description": "Artifacts of a evaluation job.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/EvaluationJobArtifactsResponse\" />"
-        },
-        {
-            "name": "EvaluationJobLogStream",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluationJobLogStream\" />"
-        },
-        {
-            "name": "EvaluationJobStatusResponse",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EvaluationJobStatusResponse\" />"
+            "name": "DatasetDefWithProvider",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DatasetDefWithProvider\" />"
         },
         {
             "name": "ModelDefWithProvider",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ModelDefWithProvider\" />"
         },
+        {
+            "name": "PaginatedRowsResult",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/PaginatedRowsResult\" />"
+        },
+        {
+            "name": "Parameter",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Parameter\" />"
+        },
+        {
+            "name": "ScoringFunctionDefWithProvider",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoringFunctionDefWithProvider\" />"
+        },
         {
             "name": "ShieldDefWithProvider",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldDefWithProvider\" />"
@@ -6507,6 +7533,14 @@
             "name": "InsertDocumentsRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InsertDocumentsRequest\" />"
         },
+        {
+            "name": "JobCancelRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/JobCancelRequest\" />"
+        },
+        {
+            "name": "JobStatus",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/JobStatus\" />"
+        },
         {
             "name": "ProviderInfo",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ProviderInfo\" />"
@@ -6575,6 +7609,10 @@
             "name": "QueryDocumentsResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QueryDocumentsResponse\" />"
         },
+        {
+            "name": "RegisterDatasetRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegisterDatasetRequest\" />"
+        },
         {
             "name": "RegisterMemoryBankRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegisterMemoryBankRequest\" />"
@@ -6583,30 +7621,14 @@
             "name": "RegisterModelRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegisterModelRequest\" />"
         },
+        {
+            "name": "RegisterScoringFunctionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegisterScoringFunctionRequest\" />"
+        },
         {
             "name": "RegisterShieldRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RegisterShieldRequest\" />"
         },
-        {
-            "name": "DialogGenerations",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DialogGenerations\" />"
-        },
-        {
-            "name": "RewardScoreRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoreRequest\" />"
-        },
-        {
-            "name": "RewardScoringResponse",
-            "description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
-        },
-        {
-            "name": "ScoredDialogGenerations",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoredDialogGenerations\" />"
-        },
-        {
-            "name": "ScoredMessage",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoredMessage\" />"
-        },
         {
             "name": "RunShieldRequest",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RunShieldRequest\" />"
@@ -6615,6 +7637,22 @@
             "name": "RunShieldResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RunShieldResponse\" />"
         },
+        {
+            "name": "ScoreRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoreRequest\" />"
+        },
+        {
+            "name": "ScoreResponse",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoreResponse\" />"
+        },
+        {
+            "name": "ScoreBatchRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoreBatchRequest\" />"
+        },
+        {
+            "name": "ScoreBatchResponse",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ScoreBatchResponse\" />"
+        },
         {
             "name": "DoraFinetuningConfig",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DoraFinetuningConfig\" />"
@@ -6650,16 +7688,18 @@
             "tags": [
                 "Agents",
                 "BatchInference",
+                "DatasetIO",
                 "Datasets",
-                "Evaluations",
+                "Eval",
                 "Inference",
                 "Inspect",
                 "Memory",
                 "MemoryBanks",
                 "Models",
                 "PostTraining",
-                "RewardScoring",
                 "Safety",
+                "Scoring",
+                "ScoringFunctions",
                 "Shields",
                 "SyntheticDataGeneration",
                 "Telemetry"
@@ -6668,6 +7708,7 @@
         {
             "name": "Types",
             "tags": [
+                "AgentCandidate",
                 "AgentConfig",
                 "AgentCreateResponse",
                 "AgentSessionCreateResponse",
@@ -6685,7 +7726,6 @@
                 "BatchCompletionRequest",
                 "BatchCompletionResponse",
                 "BuiltinTool",
-                "CancelEvaluationJobRequest",
                 "CancelTrainingJobRequest",
                 "ChatCompletionRequest",
                 "ChatCompletionResponse",
@@ -6701,22 +7741,16 @@
                 "CreateAgentRequest",
                 "CreateAgentSessionRequest",
                 "CreateAgentTurnRequest",
-                "CreateDatasetRequest",
                 "DPOAlignmentConfig",
+                "DatasetDefWithProvider",
                 "DeleteAgentsRequest",
                 "DeleteAgentsSessionRequest",
-                "DeleteDatasetRequest",
-                "DialogGenerations",
                 "DoraFinetuningConfig",
                 "EmbeddingsRequest",
                 "EmbeddingsResponse",
-                "EvaluateQuestionAnsweringRequest",
-                "EvaluateSummarizationRequest",
-                "EvaluateTextGenerationRequest",
-                "EvaluationJob",
-                "EvaluationJobArtifactsResponse",
-                "EvaluationJobLogStream",
-                "EvaluationJobStatusResponse",
+                "EvaluateBatchRequest",
+                "EvaluateRequest",
+                "EvaluateResponse",
                 "FinetuningAlgorithm",
                 "FunctionCallToolDefinition",
                 "GetAgentsSessionRequest",
@@ -6725,6 +7759,9 @@
                 "ImageMedia",
                 "InferenceStep",
                 "InsertDocumentsRequest",
+                "Job",
+                "JobCancelRequest",
+                "JobStatus",
                 "KeyValueMemoryBankDef",
                 "KeywordMemoryBankDef",
                 "LogEventRequest",
@@ -6734,8 +7771,11 @@
                 "MemoryRetrievalStep",
                 "MemoryToolDefinition",
                 "MetricEvent",
+                "ModelCandidate",
                 "ModelDefWithProvider",
                 "OptimizerConfig",
+                "PaginatedRowsResult",
+                "Parameter",
                 "PhotogenToolDefinition",
                 "PostTrainingJob",
                 "PostTrainingJobArtifactsResponse",
@@ -6748,21 +7788,25 @@
                 "QueryDocumentsRequest",
                 "QueryDocumentsResponse",
                 "RLHFAlgorithm",
+                "RegisterDatasetRequest",
                 "RegisterMemoryBankRequest",
                 "RegisterModelRequest",
+                "RegisterScoringFunctionRequest",
                 "RegisterShieldRequest",
                 "RestAPIExecutionConfig",
                 "RestAPIMethod",
-                "RewardScoreRequest",
-                "RewardScoringResponse",
                 "RouteInfo",
                 "RunShieldRequest",
                 "RunShieldResponse",
                 "SafetyViolation",
                 "SamplingParams",
                 "SamplingStrategy",
-                "ScoredDialogGenerations",
-                "ScoredMessage",
+                "ScoreBatchRequest",
+                "ScoreBatchResponse",
+                "ScoreRequest",
+                "ScoreResponse",
+                "ScoringFunctionDefWithProvider",
+                "ScoringResult",
                 "SearchToolDefinition",
                 "Session",
                 "ShieldCallStep",
@@ -6788,8 +7832,6 @@
                 "ToolResponse",
                 "ToolResponseMessage",
                 "Trace",
-                "TrainEvalDataset",
-                "TrainEvalDatasetColumnType",
                 "TrainingConfig",
                 "Turn",
                 "URL",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 906d3934a..9dcdbb028 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1,6 +1,19 @@
 components:
   responses: {}
   schemas:
+    AgentCandidate:
+      additionalProperties: false
+      properties:
+        config:
+          $ref: '#/components/schemas/AgentConfig'
+        type:
+          const: agent
+          default: agent
+          type: string
+      required:
+      - type
+      - config
+      type: object
     AgentConfig:
       additionalProperties: false
       properties:
@@ -315,14 +328,6 @@ components:
       - photogen
       - code_interpreter
       type: string
-    CancelEvaluationJobRequest:
-      additionalProperties: false
-      properties:
-        job_uuid:
-          type: string
-      required:
-      - job_uuid
-      type: object
     CancelTrainingJobRequest:
       additionalProperties: false
       properties:
@@ -351,6 +356,48 @@ components:
           type: array
         model:
           type: string
+        response_format:
+          oneOf:
+          - additionalProperties: false
+            properties:
+              schema:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              type:
+                const: json_schema
+                default: json_schema
+                type: string
+            required:
+            - type
+            - schema
+            type: object
+          - additionalProperties: false
+            properties:
+              bnf:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              type:
+                const: grammar
+                default: grammar
+                type: string
+            required:
+            - type
+            - bnf
+            type: object
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
         stream:
@@ -490,6 +537,48 @@ components:
           type: object
         model:
           type: string
+        response_format:
+          oneOf:
+          - additionalProperties: false
+            properties:
+              schema:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              type:
+                const: json_schema
+                default: json_schema
+                type: string
+            required:
+            - type
+            - schema
+            type: object
+          - additionalProperties: false
+            properties:
+              bnf:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              type:
+                const: grammar
+                default: grammar
+                type: string
+            required:
+            - type
+            - bnf
+            type: object
         sampling_params:
           $ref: '#/components/schemas/SamplingParams'
         stream:
@@ -572,17 +661,6 @@ components:
       - session_id
       - messages
       type: object
-    CreateDatasetRequest:
-      additionalProperties: false
-      properties:
-        dataset:
-          $ref: '#/components/schemas/TrainEvalDataset'
-        uuid:
-          type: string
-      required:
-      - uuid
-      - dataset
-      type: object
     DPOAlignmentConfig:
       additionalProperties: false
       properties:
@@ -600,6 +678,138 @@ components:
       - epsilon
       - gamma
       type: object
+    DatasetDefWithProvider:
+      additionalProperties: false
+      properties:
+        dataset_schema:
+          additionalProperties:
+            oneOf:
+            - additionalProperties: false
+              properties:
+                type:
+                  const: string
+                  default: string
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: number
+                  default: number
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: boolean
+                  default: boolean
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: array
+                  default: array
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: object
+                  default: object
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: json
+                  default: json
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: union
+                  default: union
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: custom
+                  default: custom
+                  type: string
+                validator_class:
+                  type: string
+              required:
+              - type
+              - validator_class
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: chat_completion_input
+                  default: chat_completion_input
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: completion_input
+                  default: completion_input
+                  type: string
+              required:
+              - type
+              type: object
+            - additionalProperties: false
+              properties:
+                type:
+                  const: agent_turn_input
+                  default: agent_turn_input
+                  type: string
+              required:
+              - type
+              type: object
+          type: object
+        identifier:
+          type: string
+        metadata:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        provider_id:
+          type: string
+        url:
+          $ref: '#/components/schemas/URL'
+      required:
+      - identifier
+      - dataset_schema
+      - url
+      - metadata
+      - provider_id
+      type: object
     DeleteAgentsRequest:
       additionalProperties: false
       properties:
@@ -619,37 +829,6 @@ components:
       - agent_id
       - session_id
       type: object
-    DeleteDatasetRequest:
-      additionalProperties: false
-      properties:
-        dataset_uuid:
-          type: string
-      required:
-      - dataset_uuid
-      type: object
-    DialogGenerations:
-      additionalProperties: false
-      properties:
-        dialog:
-          items:
-            oneOf:
-            - $ref: '#/components/schemas/UserMessage'
-            - $ref: '#/components/schemas/SystemMessage'
-            - $ref: '#/components/schemas/ToolResponseMessage'
-            - $ref: '#/components/schemas/CompletionMessage'
-          type: array
-        sampled_generations:
-          items:
-            oneOf:
-            - $ref: '#/components/schemas/UserMessage'
-            - $ref: '#/components/schemas/SystemMessage'
-            - $ref: '#/components/schemas/ToolResponseMessage'
-            - $ref: '#/components/schemas/CompletionMessage'
-          type: array
-      required:
-      - dialog
-      - sampled_generations
-      type: object
     DoraFinetuningConfig:
       additionalProperties: false
       properties:
@@ -704,78 +883,74 @@ components:
       required:
       - embeddings
       type: object
-    EvaluateQuestionAnsweringRequest:
+    EvaluateBatchRequest:
       additionalProperties: false
       properties:
-        metrics:
+        candidate:
+          oneOf:
+          - $ref: '#/components/schemas/ModelCandidate'
+          - $ref: '#/components/schemas/AgentCandidate'
+        dataset_id:
+          type: string
+        scoring_functions:
           items:
-            enum:
-            - em
-            - f1
             type: string
           type: array
       required:
-      - metrics
+      - dataset_id
+      - candidate
+      - scoring_functions
       type: object
-    EvaluateSummarizationRequest:
+    EvaluateRequest:
       additionalProperties: false
       properties:
-        metrics:
+        candidate:
+          oneOf:
+          - $ref: '#/components/schemas/ModelCandidate'
+          - $ref: '#/components/schemas/AgentCandidate'
+        input_rows:
+          items:
+            additionalProperties:
+              oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+            type: object
+          type: array
+        scoring_functions:
           items:
-            enum:
-            - rouge
-            - bleu
             type: string
           type: array
       required:
-      - metrics
+      - input_rows
+      - candidate
+      - scoring_functions
       type: object
-    EvaluateTextGenerationRequest:
+    EvaluateResponse:
       additionalProperties: false
       properties:
-        metrics:
+        generations:
           items:
-            enum:
-            - perplexity
-            - rouge
-            - bleu
-            type: string
+            additionalProperties:
+              oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+            type: object
           type: array
+        scores:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
       required:
-      - metrics
-      type: object
-    EvaluationJob:
-      additionalProperties: false
-      properties:
-        job_uuid:
-          type: string
-      required:
-      - job_uuid
-      type: object
-    EvaluationJobArtifactsResponse:
-      additionalProperties: false
-      properties:
-        job_uuid:
-          type: string
-      required:
-      - job_uuid
-      title: Artifacts of a evaluation job.
-      type: object
-    EvaluationJobLogStream:
-      additionalProperties: false
-      properties:
-        job_uuid:
-          type: string
-      required:
-      - job_uuid
-      type: object
-    EvaluationJobStatusResponse:
-      additionalProperties: false
-      properties:
-        job_uuid:
-          type: string
-      required:
-      - job_uuid
+      - generations
+      - scores
       type: object
     FinetuningAlgorithm:
       enum:
@@ -905,6 +1080,27 @@ components:
       - bank_id
       - documents
       type: object
+    Job:
+      additionalProperties: false
+      properties:
+        job_id:
+          type: string
+      required:
+      - job_id
+      type: object
+    JobCancelRequest:
+      additionalProperties: false
+      properties:
+        job_id:
+          type: string
+      required:
+      - job_id
+      type: object
+    JobStatus:
+      enum:
+      - completed
+      - in_progress
+      type: string
     KeyValueMemoryBankDef:
       additionalProperties: false
       properties:
@@ -1220,6 +1416,24 @@ components:
       - value
       - unit
       type: object
+    ModelCandidate:
+      additionalProperties: false
+      properties:
+        model:
+          type: string
+        sampling_params:
+          $ref: '#/components/schemas/SamplingParams'
+        system_message:
+          $ref: '#/components/schemas/SystemMessage'
+        type:
+          const: model
+          default: model
+          type: string
+      required:
+      - type
+      - model
+      - sampling_params
+      type: object
     ModelDefWithProvider:
       additionalProperties: false
       properties:
@@ -1266,6 +1480,144 @@ components:
       - lr_min
       - weight_decay
       type: object
+    PaginatedRowsResult:
+      additionalProperties: false
+      properties:
+        next_page_token:
+          type: string
+        rows:
+          items:
+            additionalProperties:
+              oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+            type: object
+          type: array
+        total_count:
+          type: integer
+      required:
+      - rows
+      - total_count
+      type: object
+    Parameter:
+      additionalProperties: false
+      properties:
+        description:
+          type: string
+        name:
+          type: string
+        type:
+          oneOf:
+          - additionalProperties: false
+            properties:
+              type:
+                const: string
+                default: string
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: number
+                default: number
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: boolean
+                default: boolean
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: array
+                default: array
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: object
+                default: object
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: json
+                default: json
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: union
+                default: union
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: custom
+                default: custom
+                type: string
+              validator_class:
+                type: string
+            required:
+            - type
+            - validator_class
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: chat_completion_input
+                default: chat_completion_input
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: completion_input
+                default: completion_input
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: agent_turn_input
+                default: agent_turn_input
+                type: string
+            required:
+            - type
+            type: object
+      required:
+      - name
+      - type
+      type: object
     PhotogenToolDefinition:
       additionalProperties: false
       properties:
@@ -1373,7 +1725,7 @@ components:
         algorithm_config:
           $ref: '#/components/schemas/DPOAlignmentConfig'
         dataset:
-          $ref: '#/components/schemas/TrainEvalDataset'
+          type: string
         finetuned_model:
           $ref: '#/components/schemas/URL'
         hyperparam_search_config:
@@ -1403,7 +1755,7 @@ components:
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
         validation_dataset:
-          $ref: '#/components/schemas/TrainEvalDataset'
+          type: string
       required:
       - job_uuid
       - finetuned_model
@@ -1515,6 +1867,14 @@ components:
       enum:
       - dpo
       type: string
+    RegisterDatasetRequest:
+      additionalProperties: false
+      properties:
+        dataset_def:
+          $ref: '#/components/schemas/DatasetDefWithProvider'
+      required:
+      - dataset_def
+      type: object
     RegisterMemoryBankRequest:
       additionalProperties: false
       properties:
@@ -1535,6 +1895,14 @@ components:
       required:
       - model
       type: object
+    RegisterScoringFunctionRequest:
+      additionalProperties: false
+      properties:
+        function_def:
+          $ref: '#/components/schemas/ScoringFunctionDefWithProvider'
+      required:
+      - function_def
+      type: object
     RegisterShieldRequest:
       additionalProperties: false
       properties:
@@ -1591,31 +1959,6 @@ components:
       - PUT
       - DELETE
       type: string
-    RewardScoreRequest:
-      additionalProperties: false
-      properties:
-        dialog_generations:
-          items:
-            $ref: '#/components/schemas/DialogGenerations'
-          type: array
-        model:
-          type: string
-      required:
-      - dialog_generations
-      - model
-      type: object
-    RewardScoringResponse:
-      additionalProperties: false
-      properties:
-        scored_generations:
-          items:
-            $ref: '#/components/schemas/ScoredDialogGenerations'
-          type: array
-      required:
-      - scored_generations
-      title: Response from the reward scoring. Batch of (prompt, response, score)
-        tuples that pass the threshold.
-      type: object
     RouteInfo:
       additionalProperties: false
       properties:
@@ -1717,39 +2060,239 @@ components:
       - top_p
       - top_k
       type: string
-    ScoredDialogGenerations:
+    ScoreBatchRequest:
       additionalProperties: false
       properties:
-        dialog:
+        dataset_id:
+          type: string
+        save_results_dataset:
+          type: boolean
+        scoring_functions:
           items:
-            oneOf:
-            - $ref: '#/components/schemas/UserMessage'
-            - $ref: '#/components/schemas/SystemMessage'
-            - $ref: '#/components/schemas/ToolResponseMessage'
-            - $ref: '#/components/schemas/CompletionMessage'
-          type: array
-        scored_generations:
-          items:
-            $ref: '#/components/schemas/ScoredMessage'
+            type: string
           type: array
       required:
-      - dialog
-      - scored_generations
+      - dataset_id
+      - scoring_functions
+      - save_results_dataset
       type: object
-    ScoredMessage:
+    ScoreBatchResponse:
       additionalProperties: false
       properties:
-        message:
-          oneOf:
-          - $ref: '#/components/schemas/UserMessage'
-          - $ref: '#/components/schemas/SystemMessage'
-          - $ref: '#/components/schemas/ToolResponseMessage'
-          - $ref: '#/components/schemas/CompletionMessage'
-        score:
-          type: number
+        dataset_id:
+          type: string
+        results:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
       required:
-      - message
-      - score
+      - results
+      type: object
+    ScoreRequest:
+      additionalProperties: false
+      properties:
+        input_rows:
+          items:
+            additionalProperties:
+              oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+            type: object
+          type: array
+        scoring_functions:
+          items:
+            type: string
+          type: array
+      required:
+      - input_rows
+      - scoring_functions
+      type: object
+    ScoreResponse:
+      additionalProperties: false
+      properties:
+        results:
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          type: object
+      required:
+      - results
+      type: object
+    ScoringFunctionDefWithProvider:
+      additionalProperties: false
+      properties:
+        context:
+          additionalProperties: false
+          properties:
+            judge_model:
+              type: string
+            prompt_template:
+              type: string
+          required:
+          - judge_model
+          type: object
+        description:
+          type: string
+        identifier:
+          type: string
+        metadata:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        parameters:
+          items:
+            $ref: '#/components/schemas/Parameter'
+          type: array
+        provider_id:
+          type: string
+        return_type:
+          oneOf:
+          - additionalProperties: false
+            properties:
+              type:
+                const: string
+                default: string
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: number
+                default: number
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: boolean
+                default: boolean
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: array
+                default: array
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: object
+                default: object
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: json
+                default: json
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: union
+                default: union
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: custom
+                default: custom
+                type: string
+              validator_class:
+                type: string
+            required:
+            - type
+            - validator_class
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: chat_completion_input
+                default: chat_completion_input
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: completion_input
+                default: completion_input
+                type: string
+            required:
+            - type
+            type: object
+          - additionalProperties: false
+            properties:
+              type:
+                const: agent_turn_input
+                default: agent_turn_input
+                type: string
+            required:
+            - type
+            type: object
+      required:
+      - identifier
+      - metadata
+      - parameters
+      - return_type
+      - provider_id
+      type: object
+    ScoringResult:
+      additionalProperties: false
+      properties:
+        aggregated_results:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        score_rows:
+          items:
+            additionalProperties:
+              oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+            type: object
+          type: array
+      required:
+      - score_rows
+      - aggregated_results
       type: object
     SearchToolDefinition:
       additionalProperties: false
@@ -1942,7 +2485,7 @@ components:
           - $ref: '#/components/schemas/QLoraFinetuningConfig'
           - $ref: '#/components/schemas/DoraFinetuningConfig'
         dataset:
-          $ref: '#/components/schemas/TrainEvalDataset'
+          type: string
         hyperparam_search_config:
           additionalProperties:
             oneOf:
@@ -1972,7 +2515,7 @@ components:
         training_config:
           $ref: '#/components/schemas/TrainingConfig'
         validation_dataset:
-          $ref: '#/components/schemas/TrainEvalDataset'
+          type: string
       required:
       - job_uuid
       - model
@@ -2027,7 +2570,15 @@ components:
           type: object
         synthetic_data:
           items:
-            $ref: '#/components/schemas/ScoredDialogGenerations'
+            additionalProperties:
+              oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+            type: object
           type: array
       required:
       - synthetic_data
@@ -2282,38 +2833,6 @@ components:
       - root_span_id
       - start_time
       type: object
-    TrainEvalDataset:
-      additionalProperties: false
-      properties:
-        columns:
-          additionalProperties:
-            $ref: '#/components/schemas/TrainEvalDatasetColumnType'
-          type: object
-        content_url:
-          $ref: '#/components/schemas/URL'
-        metadata:
-          additionalProperties:
-            oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          type: object
-      required:
-      - columns
-      - content_url
-      title: Dataset to be used for training or evaluating language models.
-      type: object
-    TrainEvalDatasetColumnType:
-      enum:
-      - dialog
-      - text
-      - media
-      - number
-      - json
-      type: string
     TrainingConfig:
       additionalProperties: false
       properties:
@@ -2510,7 +3029,7 @@ info:
   description: "This is the specification of the llama stack that provides\n     \
     \           a set of endpoints and their corresponding interfaces that are tailored\
     \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-10-18 20:48:17.730988"
+    \ draft and subject to change.\n                Generated at 2024-10-24 17:40:59.576117"
   title: '[DRAFT] Llama Stack Specification'
   version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -2651,6 +3170,11 @@ paths:
         required: true
         schema:
           type: string
+      - in: query
+        name: session_id
+        required: true
+        schema:
+          type: string
       - in: query
         name: turn_id
         required: true
@@ -2710,6 +3234,11 @@ paths:
         required: true
         schema:
           type: string
+      - in: query
+        name: session_id
+        required: true
+        schema:
+          type: string
       - in: query
         name: turn_id
         required: true
@@ -2781,9 +3310,29 @@ paths:
           description: OK
       tags:
       - BatchInference
-  /datasets/create:
-    post:
+  /datasetio/get_rows_paginated:
+    get:
       parameters:
+      - in: query
+        name: dataset_id
+        required: true
+        schema:
+          type: string
+      - in: query
+        name: rows_in_page
+        required: true
+        schema:
+          type: integer
+      - in: query
+        name: page_token
+        required: false
+        schema:
+          type: string
+      - in: query
+        name: filter_condition
+        required: false
+        schema:
+          type: string
       - description: JSON-encoded provider data which will be made available to the
           adapter servicing the API
         in: header
@@ -2791,43 +3340,20 @@ paths:
         required: false
         schema:
           type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateDatasetRequest'
-        required: true
       responses:
         '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/PaginatedRowsResult'
           description: OK
       tags:
-      - Datasets
-  /datasets/delete:
-    post:
-      parameters:
-      - description: JSON-encoded provider data which will be made available to the
-          adapter servicing the API
-        in: header
-        name: X-LlamaStack-ProviderData
-        required: false
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/DeleteDatasetRequest'
-        required: true
-      responses:
-        '200':
-          description: OK
-      tags:
-      - Datasets
+      - DatasetIO
   /datasets/get:
     get:
       parameters:
       - in: query
-        name: dataset_uuid
+        name: dataset_identifier
         required: true
         schema:
           type: string
@@ -2843,104 +3369,13 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/TrainEvalDataset'
+                oneOf:
+                - $ref: '#/components/schemas/DatasetDefWithProvider'
+                - type: 'null'
           description: OK
       tags:
       - Datasets
-  /evaluate/job/artifacts:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      - description: JSON-encoded provider data which will be made available to the
-          adapter servicing the API
-        in: header
-        name: X-LlamaStack-ProviderData
-        required: false
-        schema:
-          type: string
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EvaluationJobArtifactsResponse'
-          description: OK
-      tags:
-      - Evaluations
-  /evaluate/job/cancel:
-    post:
-      parameters:
-      - description: JSON-encoded provider data which will be made available to the
-          adapter servicing the API
-        in: header
-        name: X-LlamaStack-ProviderData
-        required: false
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CancelEvaluationJobRequest'
-        required: true
-      responses:
-        '200':
-          description: OK
-      tags:
-      - Evaluations
-  /evaluate/job/logs:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      - description: JSON-encoded provider data which will be made available to the
-          adapter servicing the API
-        in: header
-        name: X-LlamaStack-ProviderData
-        required: false
-        schema:
-          type: string
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EvaluationJobLogStream'
-          description: OK
-      tags:
-      - Evaluations
-  /evaluate/job/status:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      - description: JSON-encoded provider data which will be made available to the
-          adapter servicing the API
-        in: header
-        name: X-LlamaStack-ProviderData
-        required: false
-        schema:
-          type: string
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EvaluationJobStatusResponse'
-          description: OK
-      tags:
-      - Evaluations
-  /evaluate/jobs:
+  /datasets/list:
     get:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -2955,11 +3390,11 @@ paths:
           content:
             application/jsonl:
               schema:
-                $ref: '#/components/schemas/EvaluationJob'
+                $ref: '#/components/schemas/DatasetDefWithProvider'
           description: OK
       tags:
-      - Evaluations
-  /evaluate/question_answering/:
+      - Datasets
+  /datasets/register:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -2973,18 +3408,14 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/EvaluateQuestionAnsweringRequest'
+              $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
       responses:
         '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EvaluationJob'
           description: OK
       tags:
-      - Evaluations
-  /evaluate/summarization/:
+      - Datasets
+  /eval/evaluate:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -2998,18 +3429,18 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/EvaluateSummarizationRequest'
+              $ref: '#/components/schemas/EvaluateRequest'
         required: true
       responses:
         '200':
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/EvaluationJob'
+                $ref: '#/components/schemas/EvaluateResponse'
           description: OK
       tags:
-      - Evaluations
-  /evaluate/text_generation/:
+      - Eval
+  /eval/evaluate_batch:
     post:
       parameters:
       - description: JSON-encoded provider data which will be made available to the
@@ -3023,17 +3454,88 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/EvaluateTextGenerationRequest'
+              $ref: '#/components/schemas/EvaluateBatchRequest'
         required: true
       responses:
         '200':
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/EvaluationJob'
+                $ref: '#/components/schemas/Job'
           description: OK
       tags:
-      - Evaluations
+      - Eval
+  /eval/job/cancel:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/JobCancelRequest'
+        required: true
+      responses:
+        '200':
+          description: OK
+      tags:
+      - Eval
+  /eval/job/result:
+    get:
+      parameters:
+      - in: query
+        name: job_id
+        required: true
+        schema:
+          type: string
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/EvaluateResponse'
+          description: OK
+      tags:
+      - Eval
+  /eval/job/status:
+    get:
+      parameters:
+      - in: query
+        name: job_id
+        required: true
+        schema:
+          type: string
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/JobStatus'
+                - type: 'null'
+          description: OK
+      tags:
+      - Eval
   /health:
     get:
       parameters:
@@ -3501,31 +4003,6 @@ paths:
           description: OK
       tags:
       - Inspect
-  /reward_scoring/score:
-    post:
-      parameters:
-      - description: JSON-encoded provider data which will be made available to the
-          adapter servicing the API
-        in: header
-        name: X-LlamaStack-ProviderData
-        required: false
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RewardScoreRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RewardScoringResponse'
-          description: OK
-      tags:
-      - RewardScoring
   /routes/list:
     get:
       parameters:
@@ -3574,6 +4051,122 @@ paths:
           description: OK
       tags:
       - Safety
+  /scoring/score:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ScoreRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ScoreResponse'
+          description: OK
+      tags:
+      - Scoring
+  /scoring/score_batch:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ScoreBatchRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ScoreBatchResponse'
+          description: OK
+      tags:
+      - Scoring
+  /scoring_functions/get:
+    get:
+      parameters:
+      - in: query
+        name: name
+        required: true
+        schema:
+          type: string
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/ScoringFunctionDefWithProvider'
+                - type: 'null'
+          description: OK
+      tags:
+      - ScoringFunctions
+  /scoring_functions/list:
+    get:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      responses:
+        '200':
+          content:
+            application/jsonl:
+              schema:
+                $ref: '#/components/schemas/ScoringFunctionDefWithProvider'
+          description: OK
+      tags:
+      - ScoringFunctions
+  /scoring_functions/register:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+        required: true
+      responses:
+        '200':
+          description: OK
+      tags:
+      - ScoringFunctions
   /shields/get:
     get:
       parameters:
@@ -3715,21 +4308,23 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
-- name: Models
-- name: RewardScoring
-- name: MemoryBanks
-- name: Shields
+- name: Eval
+- name: ScoringFunctions
 - name: SyntheticDataGeneration
-- name: Inference
 - name: Inspect
-- name: BatchInference
-- name: Memory
-- name: Datasets
-- name: Agents
 - name: PostTraining
-- name: Telemetry
+- name: Models
 - name: Safety
-- name: Evaluations
+- name: MemoryBanks
+- name: DatasetIO
+- name: Memory
+- name: Scoring
+- name: Shields
+- name: Datasets
+- name: Inference
+- name: Telemetry
+- name: BatchInference
+- name: Agents
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
   name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
@@ -3785,9 +4380,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionResponse"
     />
   name: BatchCompletionResponse
-- description: <SchemaDefinition schemaRef="#/components/schemas/CancelEvaluationJobRequest"
-    />
-  name: CancelEvaluationJobRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/CancelTrainingJobRequest"
     />
   name: CancelTrainingJobRequest
@@ -3922,43 +4514,35 @@ tags:
   name: Turn
 - description: <SchemaDefinition schemaRef="#/components/schemas/ViolationLevel" />
   name: ViolationLevel
-- description: 'Dataset to be used for training or evaluating language models.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/TrainEvalDataset" />'
-  name: TrainEvalDataset
-- description: <SchemaDefinition schemaRef="#/components/schemas/TrainEvalDatasetColumnType"
-    />
-  name: TrainEvalDatasetColumnType
-- description: <SchemaDefinition schemaRef="#/components/schemas/CreateDatasetRequest"
-    />
-  name: CreateDatasetRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/DeleteAgentsRequest"
     />
   name: DeleteAgentsRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/DeleteAgentsSessionRequest"
     />
   name: DeleteAgentsSessionRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/DeleteDatasetRequest"
-    />
-  name: DeleteDatasetRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/EmbeddingsRequest"
     />
   name: EmbeddingsRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/EmbeddingsResponse"
     />
   name: EmbeddingsResponse
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateQuestionAnsweringRequest"
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgentCandidate" />
+  name: AgentCandidate
+- description: <SchemaDefinition schemaRef="#/components/schemas/ModelCandidate" />
+  name: ModelCandidate
+- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateRequest"
     />
-  name: EvaluateQuestionAnsweringRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluationJob" />
-  name: EvaluationJob
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateSummarizationRequest"
+  name: EvaluateRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateResponse"
     />
-  name: EvaluateSummarizationRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateTextGenerationRequest"
+  name: EvaluateResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/ScoringResult" />
+  name: ScoringResult
+- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluateBatchRequest"
     />
-  name: EvaluateTextGenerationRequest
+  name: EvaluateBatchRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/Job" />
+  name: Job
 - description: <SchemaDefinition schemaRef="#/components/schemas/GetAgentsSessionRequest"
     />
   name: GetAgentsSessionRequest
@@ -3982,21 +4566,20 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgentStepResponse"
     />
   name: AgentStepResponse
-- description: 'Artifacts of a evaluation job.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/EvaluationJobArtifactsResponse"
-    />'
-  name: EvaluationJobArtifactsResponse
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluationJobLogStream"
+- description: <SchemaDefinition schemaRef="#/components/schemas/DatasetDefWithProvider"
     />
-  name: EvaluationJobLogStream
-- description: <SchemaDefinition schemaRef="#/components/schemas/EvaluationJobStatusResponse"
-    />
-  name: EvaluationJobStatusResponse
+  name: DatasetDefWithProvider
 - description: <SchemaDefinition schemaRef="#/components/schemas/ModelDefWithProvider"
     />
   name: ModelDefWithProvider
+- description: <SchemaDefinition schemaRef="#/components/schemas/PaginatedRowsResult"
+    />
+  name: PaginatedRowsResult
+- description: <SchemaDefinition schemaRef="#/components/schemas/Parameter" />
+  name: Parameter
+- description: <SchemaDefinition schemaRef="#/components/schemas/ScoringFunctionDefWithProvider"
+    />
+  name: ScoringFunctionDefWithProvider
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefWithProvider"
     />
   name: ShieldDefWithProvider
@@ -4038,6 +4621,11 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/InsertDocumentsRequest"
     />
   name: InsertDocumentsRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/JobCancelRequest"
+    />
+  name: JobCancelRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/JobStatus" />
+  name: JobStatus
 - description: <SchemaDefinition schemaRef="#/components/schemas/ProviderInfo" />
   name: ProviderInfo
 - description: <SchemaDefinition schemaRef="#/components/schemas/RouteInfo" />
@@ -4081,38 +4669,37 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QueryDocumentsResponse"
     />
   name: QueryDocumentsResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/RegisterDatasetRequest"
+    />
+  name: RegisterDatasetRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/RegisterMemoryBankRequest"
     />
   name: RegisterMemoryBankRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/RegisterModelRequest"
     />
   name: RegisterModelRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/RegisterScoringFunctionRequest"
+    />
+  name: RegisterScoringFunctionRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/RegisterShieldRequest"
     />
   name: RegisterShieldRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/DialogGenerations"
-    />
-  name: DialogGenerations
-- description: <SchemaDefinition schemaRef="#/components/schemas/RewardScoreRequest"
-    />
-  name: RewardScoreRequest
-- description: 'Response from the reward scoring. Batch of (prompt, response, score)
-    tuples that pass the threshold.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
-  name: RewardScoringResponse
-- description: <SchemaDefinition schemaRef="#/components/schemas/ScoredDialogGenerations"
-    />
-  name: ScoredDialogGenerations
-- description: <SchemaDefinition schemaRef="#/components/schemas/ScoredMessage" />
-  name: ScoredMessage
 - description: <SchemaDefinition schemaRef="#/components/schemas/RunShieldRequest"
     />
   name: RunShieldRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/RunShieldResponse"
     />
   name: RunShieldResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/ScoreRequest" />
+  name: ScoreRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/ScoreResponse" />
+  name: ScoreResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/ScoreBatchRequest"
+    />
+  name: ScoreBatchRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/ScoreBatchResponse"
+    />
+  name: ScoreBatchResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/DoraFinetuningConfig"
     />
   name: DoraFinetuningConfig
@@ -4143,21 +4730,24 @@ x-tagGroups:
   tags:
   - Agents
   - BatchInference
+  - DatasetIO
   - Datasets
-  - Evaluations
+  - Eval
   - Inference
   - Inspect
   - Memory
   - MemoryBanks
   - Models
   - PostTraining
-  - RewardScoring
   - Safety
+  - Scoring
+  - ScoringFunctions
   - Shields
   - SyntheticDataGeneration
   - Telemetry
 - name: Types
   tags:
+  - AgentCandidate
   - AgentConfig
   - AgentCreateResponse
   - AgentSessionCreateResponse
@@ -4175,7 +4765,6 @@ x-tagGroups:
   - BatchCompletionRequest
   - BatchCompletionResponse
   - BuiltinTool
-  - CancelEvaluationJobRequest
   - CancelTrainingJobRequest
   - ChatCompletionRequest
   - ChatCompletionResponse
@@ -4191,22 +4780,16 @@ x-tagGroups:
   - CreateAgentRequest
   - CreateAgentSessionRequest
   - CreateAgentTurnRequest
-  - CreateDatasetRequest
   - DPOAlignmentConfig
+  - DatasetDefWithProvider
   - DeleteAgentsRequest
   - DeleteAgentsSessionRequest
-  - DeleteDatasetRequest
-  - DialogGenerations
   - DoraFinetuningConfig
   - EmbeddingsRequest
   - EmbeddingsResponse
-  - EvaluateQuestionAnsweringRequest
-  - EvaluateSummarizationRequest
-  - EvaluateTextGenerationRequest
-  - EvaluationJob
-  - EvaluationJobArtifactsResponse
-  - EvaluationJobLogStream
-  - EvaluationJobStatusResponse
+  - EvaluateBatchRequest
+  - EvaluateRequest
+  - EvaluateResponse
   - FinetuningAlgorithm
   - FunctionCallToolDefinition
   - GetAgentsSessionRequest
@@ -4215,6 +4798,9 @@ x-tagGroups:
   - ImageMedia
   - InferenceStep
   - InsertDocumentsRequest
+  - Job
+  - JobCancelRequest
+  - JobStatus
   - KeyValueMemoryBankDef
   - KeywordMemoryBankDef
   - LogEventRequest
@@ -4224,8 +4810,11 @@ x-tagGroups:
   - MemoryRetrievalStep
   - MemoryToolDefinition
   - MetricEvent
+  - ModelCandidate
   - ModelDefWithProvider
   - OptimizerConfig
+  - PaginatedRowsResult
+  - Parameter
   - PhotogenToolDefinition
   - PostTrainingJob
   - PostTrainingJobArtifactsResponse
@@ -4238,21 +4827,25 @@ x-tagGroups:
   - QueryDocumentsRequest
   - QueryDocumentsResponse
   - RLHFAlgorithm
+  - RegisterDatasetRequest
   - RegisterMemoryBankRequest
   - RegisterModelRequest
+  - RegisterScoringFunctionRequest
   - RegisterShieldRequest
   - RestAPIExecutionConfig
   - RestAPIMethod
-  - RewardScoreRequest
-  - RewardScoringResponse
   - RouteInfo
   - RunShieldRequest
   - RunShieldResponse
   - SafetyViolation
   - SamplingParams
   - SamplingStrategy
-  - ScoredDialogGenerations
-  - ScoredMessage
+  - ScoreBatchRequest
+  - ScoreBatchResponse
+  - ScoreRequest
+  - ScoreResponse
+  - ScoringFunctionDefWithProvider
+  - ScoringResult
   - SearchToolDefinition
   - Session
   - ShieldCallStep
@@ -4278,8 +4871,6 @@ x-tagGroups:
   - ToolResponse
   - ToolResponseMessage
   - Trace
-  - TrainEvalDataset
-  - TrainEvalDatasetColumnType
   - TrainingConfig
   - Turn
   - URL
diff --git a/llama_stack/apis/common/job_types.py b/llama_stack/apis/common/job_types.py
index ab203ebb8..ab8ab22dc 100644
--- a/llama_stack/apis/common/job_types.py
+++ b/llama_stack/apis/common/job_types.py
@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from enum import Enum
+
 from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel
 
@@ -10,3 +12,9 @@ from pydantic import BaseModel
 @json_schema_type
 class Job(BaseModel):
     job_id: str
+
+
+@json_schema_type
+class JobStatus(Enum):
+    completed = "completed"
+    in_progress = "in_progress"
diff --git a/llama_stack/apis/common/type_system.py b/llama_stack/apis/common/type_system.py
index 35a26e9ef..93a3c0339 100644
--- a/llama_stack/apis/common/type_system.py
+++ b/llama_stack/apis/common/type_system.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Dict, List, Literal, Union
+from typing import Literal, Union
 
 from pydantic import BaseModel, Field
 from typing_extensions import Annotated
@@ -24,12 +24,10 @@ class BooleanType(BaseModel):
 
 class ArrayType(BaseModel):
     type: Literal["array"] = "array"
-    items: "ParamType"
 
 
 class ObjectType(BaseModel):
     type: Literal["object"] = "object"
-    properties: Dict[str, "ParamType"] = Field(default_factory=dict)
 
 
 class JsonType(BaseModel):
@@ -38,12 +36,21 @@ class JsonType(BaseModel):
 
 class UnionType(BaseModel):
     type: Literal["union"] = "union"
-    options: List["ParamType"] = Field(default_factory=list)
 
 
-class CustomType(BaseModel):
-    type: Literal["custom"] = "custom"
-    validator_class: str
+class ChatCompletionInputType(BaseModel):
+    # expects List[Message] for messages
+    type: Literal["chat_completion_input"] = "chat_completion_input"
+
+
+class CompletionInputType(BaseModel):
+    # expects InterleavedTextMedia for content
+    type: Literal["completion_input"] = "completion_input"
+
+
+class AgentTurnInputType(BaseModel):
+    # expects List[Message] for messages (may also include attachments?)
+    type: Literal["agent_turn_input"] = "agent_turn_input"
 
 
 ParamType = Annotated[
@@ -55,11 +62,22 @@ ParamType = Annotated[
         ObjectType,
         JsonType,
         UnionType,
-        CustomType,
+        ChatCompletionInputType,
+        CompletionInputType,
+        AgentTurnInputType,
     ],
     Field(discriminator="type"),
 ]
 
-ArrayType.model_rebuild()
-ObjectType.model_rebuild()
-UnionType.model_rebuild()
+# TODO: recursive definition of ParamType in these containers
+# will cause infinite recursion in OpenAPI generation script
+# since we are going with ChatCompletionInputType and CompletionInputType
+# we don't need to worry about ArrayType/ObjectType/UnionType for now
+# ArrayType.model_rebuild()
+# ObjectType.model_rebuild()
+# UnionType.model_rebuild()
+
+
+# class CustomType(BaseModel):
+#     type: Literal["custom"] = "custom"
+#     validator_class: str
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index a97af1fc0..51f49da15 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -12,7 +12,7 @@ from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_models.schema_utils import json_schema_type, webmethod
 from llama_stack.apis.scoring_functions import *  # noqa: F403
 from llama_stack.apis.agents import AgentConfig
-from llama_stack.apis.common.job_types import Job
+from llama_stack.apis.common.job_types import Job, JobStatus
 from llama_stack.apis.scoring import *  # noqa: F403
 
 
@@ -40,7 +40,7 @@ class EvaluateResponse(BaseModel):
     generations: List[Dict[str, Any]]
 
     # each key in the dict is a scoring function name
-    scores: List[Dict[str, ScoringResult]]
+    scores: Dict[str, ScoringResult]
 
 
 class Eval(Protocol):
@@ -61,10 +61,10 @@ class Eval(Protocol):
     ) -> EvaluateResponse: ...
 
     @webmethod(route="/eval/job/status", method="GET")
-    async def job_status(self, job_id: str) -> None: ...
+    async def job_status(self, job_id: str) -> Optional[JobStatus]: ...
 
     @webmethod(route="/eval/job/cancel", method="POST")
     async def job_cancel(self, job_id: str) -> None: ...
 
     @webmethod(route="/eval/job/result", method="GET")
-    async def job_result(self, job_id: str) -> None: ...
+    async def job_result(self, job_id: str) -> EvaluateResponse: ...
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index d943f48b2..eb4992cc6 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -14,7 +14,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
 from pydantic import BaseModel, Field
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_stack.apis.dataset import *  # noqa: F403
+from llama_stack.apis.datasets import *  # noqa: F403
 from llama_stack.apis.common.training_types import *  # noqa: F403
 
 
@@ -107,8 +107,8 @@ class PostTrainingSFTRequest(BaseModel):
     job_uuid: str
 
     model: str
-    dataset: TrainEvalDataset
-    validation_dataset: TrainEvalDataset
+    dataset_id: str
+    validation_dataset_id: str
 
     algorithm: FinetuningAlgorithm
     algorithm_config: Union[
@@ -131,8 +131,8 @@ class PostTrainingRLHFRequest(BaseModel):
 
     finetuned_model: URL
 
-    dataset: TrainEvalDataset
-    validation_dataset: TrainEvalDataset
+    dataset_id: str
+    validation_dataset_id: str
 
     algorithm: RLHFAlgorithm
     algorithm_config: Union[DPOAlignmentConfig]
@@ -181,8 +181,8 @@ class PostTraining(Protocol):
         self,
         job_uuid: str,
         model: str,
-        dataset: TrainEvalDataset,
-        validation_dataset: TrainEvalDataset,
+        dataset_id: str,
+        validation_dataset_id: str,
         algorithm: FinetuningAlgorithm,
         algorithm_config: Union[
             LoraFinetuningConfig, QLoraFinetuningConfig, DoraFinetuningConfig
@@ -198,8 +198,8 @@ class PostTraining(Protocol):
         self,
         job_uuid: str,
         finetuned_model: URL,
-        dataset: TrainEvalDataset,
-        validation_dataset: TrainEvalDataset,
+        dataset_id: str,
+        validation_dataset_id: str,
         algorithm: RLHFAlgorithm,
         algorithm_config: Union[DPOAlignmentConfig],
         optimizer_config: OptimizerConfig,
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index adac34d55..1fd523dcb 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -37,7 +37,7 @@ class ScoreResponse(BaseModel):
 
 
 class ScoringFunctionStore(Protocol):
-    def get_scoring_function(self, name: str) -> ScoringFunctionDefWithProvider: ...
+    def get_scoring_function(self, name: str) -> ScoringFnDefWithProvider: ...
 
 
 @runtime_checkable
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index a242215c6..fc3584f90 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -29,7 +29,7 @@ class LLMAsJudgeContext(BaseModel):
 
 
 @json_schema_type
-class ScoringFunctionDef(BaseModel):
+class ScoringFnDef(BaseModel):
     identifier: str
     description: Optional[str] = None
     metadata: Dict[str, Any] = Field(
@@ -48,7 +48,7 @@ class ScoringFunctionDef(BaseModel):
 
 
 @json_schema_type
-class ScoringFunctionDefWithProvider(ScoringFunctionDef):
+class ScoringFnDefWithProvider(ScoringFnDef):
     provider_id: str = Field(
         description="ID of the provider which serves this dataset",
     )
@@ -57,14 +57,14 @@ class ScoringFunctionDefWithProvider(ScoringFunctionDef):
 @runtime_checkable
 class ScoringFunctions(Protocol):
     @webmethod(route="/scoring_functions/list", method="GET")
-    async def list_scoring_functions(self) -> List[ScoringFunctionDefWithProvider]: ...
+    async def list_scoring_functions(self) -> List[ScoringFnDefWithProvider]: ...
 
     @webmethod(route="/scoring_functions/get", method="GET")
     async def get_scoring_function(
         self, name: str
-    ) -> Optional[ScoringFunctionDefWithProvider]: ...
+    ) -> Optional[ScoringFnDefWithProvider]: ...
 
     @webmethod(route="/scoring_functions/register", method="POST")
     async def register_scoring_function(
-        self, function_def: ScoringFunctionDefWithProvider
+        self, function_def: ScoringFnDefWithProvider
     ) -> None: ...
diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
index 60c756128..05b49036d 100644
--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@@ -13,7 +13,6 @@ from llama_models.schema_utils import json_schema_type, webmethod
 from pydantic import BaseModel
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_stack.apis.reward_scoring import *  # noqa: F403
 
 
 class FilteringFunction(Enum):
@@ -40,7 +39,7 @@ class SyntheticDataGenerationRequest(BaseModel):
 class SyntheticDataGenerationResponse(BaseModel):
     """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
 
-    synthetic_data: List[ScoredDialogGenerations]
+    synthetic_data: List[Dict[str, Any]]
     statistics: Optional[Dict[str, Any]] = None
 
 
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index 318809baf..9ad82cd79 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -34,7 +34,7 @@ RoutableObject = Union[
     ShieldDef,
     MemoryBankDef,
     DatasetDef,
-    ScoringFunctionDef,
+    ScoringFnDef,
 ]
 
 RoutableObjectWithProvider = Union[
@@ -42,7 +42,7 @@ RoutableObjectWithProvider = Union[
     ShieldDefWithProvider,
     MemoryBankDefWithProvider,
     DatasetDefWithProvider,
-    ScoringFunctionDefWithProvider,
+    ScoringFnDefWithProvider,
 ]
 
 RoutedProtocol = Union[
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index b9b9fb229..cfe31a21d 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -14,6 +14,7 @@ from llama_stack.distribution.datatypes import *  # noqa: F403
 from llama_stack.apis.agents import Agents
 from llama_stack.apis.datasetio import DatasetIO
 from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.eval import Eval
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.inspect import Inspect
 from llama_stack.apis.memory import Memory
@@ -46,6 +47,7 @@ def api_protocol_map() -> Dict[Api, Any]:
         Api.datasetio: DatasetIO,
         Api.scoring_functions: ScoringFunctions,
         Api.scoring: Scoring,
+        Api.eval: Eval,
     }
 
 
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index dcd588a9e..3e07b9162 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -100,7 +100,7 @@ class CommonRoutingTableImpl(RoutingTable):
                 scoring_functions = await p.list_scoring_functions()
                 add_objects(
                     [
-                        ScoringFunctionDefWithProvider(**s.dict(), provider_id=pid)
+                        ScoringFnDefWithProvider(**s.dict(), provider_id=pid)
                         for s in scoring_functions
                     ]
                 )
@@ -239,7 +239,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
 
 
 class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, Scoring):
-    async def list_scoring_functions(self) -> List[ScoringFunctionDefWithProvider]:
+    async def list_scoring_functions(self) -> List[ScoringFnDefWithProvider]:
         objects = []
         for objs in self.registry.values():
             objects.extend(objs)
@@ -247,10 +247,10 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, Scoring):
 
     async def get_scoring_function(
         self, name: str
-    ) -> Optional[ScoringFunctionDefWithProvider]:
+    ) -> Optional[ScoringFnDefWithProvider]:
         return self.get_object_by_identifier(name)
 
     async def register_scoring_function(
-        self, function_def: ScoringFunctionDefWithProvider
+        self, function_def: ScoringFnDefWithProvider
     ) -> None:
         await self.register_object(function_def)
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 903ff5438..eace0ea1a 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.datasets import DatasetDef
 from llama_stack.apis.memory_banks import MemoryBankDef
 from llama_stack.apis.models import ModelDef
-from llama_stack.apis.scoring_functions import ScoringFunctionDef
+from llama_stack.apis.scoring_functions import ScoringFnDef
 from llama_stack.apis.shields import ShieldDef
 
 
@@ -25,6 +25,7 @@ class Api(Enum):
     memory = "memory"
     datasetio = "datasetio"
     scoring = "scoring"
+    eval = "eval"
 
     telemetry = "telemetry"
 
@@ -63,11 +64,9 @@ class DatasetsProtocolPrivate(Protocol):
 
 
 class ScoringFunctionsProtocolPrivate(Protocol):
-    async def list_scoring_functions(self) -> List[ScoringFunctionDef]: ...
+    async def list_scoring_functions(self) -> List[ScoringFnDef]: ...
 
-    async def register_scoring_function(
-        self, function_def: ScoringFunctionDef
-    ) -> None: ...
+    async def register_scoring_function(self, function_def: ScoringFnDef) -> None: ...
 
 
 @json_schema_type
diff --git a/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py b/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py
index 43664f394..a96d9bcab 100644
--- a/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py
+++ b/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py
@@ -143,11 +143,12 @@ class MetaReferenceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
         else:
             next_page_token = int(page_token)
 
-        if rows_in_page == -1:
-            rows = dataset_info.dataset_impl[next_page_token:]
-
         start = next_page_token
-        end = min(start + rows_in_page, len(dataset_info.dataset_impl))
+        if rows_in_page == -1:
+            end = len(dataset_info.dataset_impl)
+        else:
+            end = min(start + rows_in_page, len(dataset_info.dataset_impl))
+
         rows = dataset_info.dataset_impl[start:end]
 
         return PaginatedRowsResult(
diff --git a/llama_stack/providers/impls/meta_reference/eval/__init__.py b/llama_stack/providers/impls/meta_reference/eval/__init__.py
new file mode 100644
index 000000000..fb285c668
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/eval/__init__.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Dict
+
+from llama_stack.distribution.datatypes import Api, ProviderSpec
+
+from .config import MetaReferenceEvalConfig
+
+
+async def get_provider_impl(
+    config: MetaReferenceEvalConfig,
+    deps: Dict[Api, ProviderSpec],
+):
+    from .eval import MetaReferenceEvalImpl
+
+    impl = MetaReferenceEvalImpl(
+        config,
+        deps[Api.datasetio],
+        deps[Api.datasets],
+        deps[Api.scoring],
+        deps[Api.inference],
+    )
+    await impl.initialize()
+    return impl
diff --git a/llama_stack/providers/impls/meta_reference/eval/config.py b/llama_stack/providers/impls/meta_reference/eval/config.py
new file mode 100644
index 000000000..1892da2a2
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/eval/config.py
@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.eval import *  # noqa: F401, F403
+
+
+class MetaReferenceEvalConfig(BaseModel): ...
diff --git a/llama_stack/providers/impls/meta_reference/eval/eval.py b/llama_stack/providers/impls/meta_reference/eval/eval.py
new file mode 100644
index 000000000..d675e40eb
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/eval/eval.py
@@ -0,0 +1,167 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import Enum
+from llama_models.llama3.api.datatypes import *  # noqa: F403
+
+from llama_stack.apis.common.type_system import *  # noqa: F403
+from llama_stack.apis.common.job_types import Job
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.eval import Eval, EvalCandidate, EvaluateResponse, JobStatus
+from llama_stack.apis.inference import Inference
+from llama_stack.apis.scoring import Scoring
+
+from .config import MetaReferenceEvalConfig
+
+
+class ColumnName(Enum):
+    expected_answer = "expected_answer"
+    chat_completion_input = "chat_completion_input"
+    completion_input = "completion_input"
+    generated_answer = "generated_answer"
+
+
+class MetaReferenceEvalImpl(Eval):
+    def __init__(
+        self,
+        config: MetaReferenceEvalConfig,
+        datasetio_api: DatasetIO,
+        datasets_api: Datasets,
+        scoring_api: Scoring,
+        inference_api: Inference,
+    ) -> None:
+        self.config = config
+        self.datasetio_api = datasetio_api
+        self.datasets_api = datasets_api
+        self.scoring_api = scoring_api
+        self.inference_api = inference_api
+
+        # TODO: assume sync job, will need jobs API for async scheduling
+        self.jobs = {}
+
+    async def initialize(self) -> None: ...
+
+    async def shutdown(self) -> None: ...
+
+    async def validate_eval_input_dataset_schema(self, dataset_id: str) -> None:
+        dataset_def = await self.datasets_api.get_dataset(dataset_identifier=dataset_id)
+        if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
+            raise ValueError(f"Dataset {dataset_id} does not have a schema defined.")
+
+        expected_schemas = [
+            {
+                ColumnName.expected_answer.value: StringType(),
+                ColumnName.chat_completion_input.value: ChatCompletionInputType(),
+            },
+            {
+                ColumnName.expected_answer.value: StringType(),
+                ColumnName.completion_input.value: CompletionInputType(),
+            },
+        ]
+
+        if dataset_def.dataset_schema not in expected_schemas:
+            raise ValueError(
+                f"Dataset {dataset_id} does not have a correct input schema in {expected_schemas}"
+            )
+
+    async def evaluate_batch(
+        self,
+        dataset_id: str,
+        candidate: EvalCandidate,
+        scoring_functions: List[str],
+    ) -> Job:
+        await self.validate_eval_input_dataset_schema(dataset_id=dataset_id)
+        all_rows = await self.datasetio_api.get_rows_paginated(
+            dataset_id=dataset_id,
+            rows_in_page=-1,
+        )
+        res = await self.evaluate(
+            input_rows=all_rows.rows,
+            candidate=candidate,
+            scoring_functions=scoring_functions,
+        )
+
+        # TODO: currently needs to wait for generation before returning
+        # need job scheduler queue (ray/celery) w/ jobs api
+        job_id = str(len(self.jobs))
+        self.jobs[job_id] = res
+        return Job(job_id=job_id)
+
+    async def evaluate(
+        self,
+        input_rows: List[Dict[str, Any]],
+        candidate: EvalCandidate,
+        scoring_functions: List[str],
+    ) -> EvaluateResponse:
+        if candidate.type == "agent":
+            raise NotImplementedError(
+                "Evaluation with generation has not been implemented for agents"
+            )
+        assert (
+            candidate.sampling_params.max_tokens is not None
+        ), "SamplingParams.max_tokens must be provided"
+
+        generations = []
+        for x in input_rows:
+            if ColumnName.completion_input.value in x:
+                input_content = eval(str(x[ColumnName.completion_input.value]))
+                response = await self.inference_api.completion(
+                    model=candidate.model,
+                    content=input_content,
+                    sampling_params=candidate.sampling_params,
+                )
+                generations.append(
+                    {
+                        ColumnName.generated_answer.value: response.completion_message.content
+                    }
+                )
+            elif ColumnName.chat_completion_input.value in x:
+                input_messages = eval(str(x[ColumnName.chat_completion_input.value]))
+                input_messages = [UserMessage(**x) for x in input_messages]
+                messages = []
+                if candidate.system_message:
+                    messages.append(candidate.system_message)
+                messages += input_messages
+                response = await self.inference_api.chat_completion(
+                    model=candidate.model,
+                    messages=messages,
+                    sampling_params=candidate.sampling_params,
+                )
+                generations.append(
+                    {
+                        ColumnName.generated_answer.value: response.completion_message.content
+                    }
+                )
+            else:
+                raise ValueError("Invalid input row")
+
+        # scoring with generated_answer
+        score_input_rows = [
+            input_r | generated_r
+            for input_r, generated_r in zip(input_rows, generations)
+        ]
+
+        score_response = await self.scoring_api.score(
+            input_rows=score_input_rows, scoring_functions=scoring_functions
+        )
+
+        return EvaluateResponse(generations=generations, scores=score_response.results)
+
+    async def job_status(self, job_id: str) -> Optional[JobStatus]:
+        if job_id in self.jobs:
+            return JobStatus.completed
+
+        return None
+
+    async def job_cancel(self, job_id: str) -> None:
+        raise NotImplementedError("Job cancel is not implemented yet")
+
+    async def job_result(self, job_id: str) -> EvaluateResponse:
+        status = await self.job_status(job_id)
+        if not status or status != JobStatus.completed:
+            raise ValueError(f"Job is not completed, Status: {status.value}")
+
+        return self.jobs[job_id]
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring.py b/llama_stack/providers/impls/meta_reference/scoring/scoring.py
index 0d32c8195..b1d561533 100644
--- a/llama_stack/providers/impls/meta_reference/scoring/scoring.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring.py
@@ -13,17 +13,22 @@ from llama_stack.apis.datasetio import *  # noqa: F403
 from llama_stack.apis.datasets import *  # noqa: F403
 
 from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
-from llama_stack.providers.impls.meta_reference.scoring.scorer.equality_scorer import (
-    EqualityScorer,
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.equality_scoring_fn import (
+    EqualityScoringFn,
+)
+
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import (
+    SubsetOfScoringFn,
 )
 
 from .config import MetaReferenceScoringConfig
 
-SUPPORTED_SCORERS = [
-    EqualityScorer,
+SUPPORTED_SCORING_FNS = [
+    EqualityScoringFn,
+    SubsetOfScoringFn,
 ]
 
-SCORER_REGISTRY = {x.scoring_function_def.identifier: x for x in SUPPORTED_SCORERS}
+SCORER_REGISTRY = {x.scoring_function_def.identifier: x for x in SUPPORTED_SCORING_FNS}
 
 
 class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
@@ -41,10 +46,10 @@ class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
 
     async def shutdown(self) -> None: ...
 
-    async def list_scoring_functions(self) -> List[ScoringFunctionDef]:
-        return [x.scoring_function_def for x in SUPPORTED_SCORERS]
+    async def list_scoring_functions(self) -> List[ScoringFnDef]:
+        return [x.scoring_function_def for x in SUPPORTED_SCORING_FNS]
 
-    async def register_scoring_function(self, function_def: ScoringFunctionDef) -> None:
+    async def register_scoring_function(self, function_def: ScoringFnDef) -> None:
         raise NotImplementedError(
             "Dynamically registering scoring functions is not supported"
         )
@@ -96,9 +101,9 @@ class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
         for scoring_fn_id in scoring_functions:
             if scoring_fn_id not in SCORER_REGISTRY:
                 raise ValueError(f"Scoring function {scoring_fn_id} is not supported.")
-            scorer = SCORER_REGISTRY[scoring_fn_id]()
-            score_results = scorer.score(input_rows)
-            agg_results = scorer.aggregate(score_results)
+            scoring_fn = SCORER_REGISTRY[scoring_fn_id]()
+            score_results = scoring_fn.score(input_rows)
+            agg_results = scoring_fn.aggregate(score_results)
             res[scoring_fn_id] = ScoringResult(
                 score_rows=score_results,
                 aggregated_results=agg_results,
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/__init__.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/impls/meta_reference/scoring/scorer/__init__.py
rename to llama_stack/providers/impls/meta_reference/scoring/scoring_fn/__init__.py
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/base_scoring_fn.py
similarity index 81%
rename from llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py
rename to llama_stack/providers/impls/meta_reference/scoring/scoring_fn/base_scoring_fn.py
index ea8a3f063..952d46bb2 100644
--- a/llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/base_scoring_fn.py
@@ -9,15 +9,15 @@ from llama_stack.apis.scoring_functions import *  # noqa: F401, F403
 from llama_stack.apis.scoring import *  # noqa: F401, F403
 
 
-class BaseScorer(ABC):
+class BaseScoringFn(ABC):
     """
-    Base interface class for all meta-reference scorers.
-    Each scorer needs to implement the following methods:
+    Base interface class for all meta-reference scoring_fns.
+    Each scoring_fn needs to implement the following methods:
     - score_row(self, row)
-    - aggregate(self, scorer_results)
+    - aggregate(self, scoring_fn_results)
     """
 
-    scoring_function_def: ScoringFunctionDef
+    scoring_function_def: ScoringFnDef
 
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/common.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/common.py
new file mode 100644
index 000000000..52eabea2e
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/common.py
@@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any, Dict, List
+
+from llama_stack.apis.scoring import ScoringResultRow
+
+
+def aggregate_accuracy(scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+    num_correct = sum(result["score"] for result in scoring_results)
+    avg_score = num_correct / len(scoring_results)
+
+    return {
+        "accuracy": avg_score,
+        "num_correct": num_correct,
+        "num_total": len(scoring_results),
+    }
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/equality_scoring_fn.py
similarity index 65%
rename from llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py
rename to llama_stack/providers/impls/meta_reference/scoring/scoring_fn/equality_scoring_fn.py
index ce765bfb5..cce0f948a 100644
--- a/llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/equality_scoring_fn.py
@@ -4,20 +4,23 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_stack.providers.impls.meta_reference.scoring.scorer.base_scorer import (
-    BaseScorer,
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
+    BaseScoringFn,
 )
 from llama_stack.apis.scoring_functions import *  # noqa: F401, F403
 from llama_stack.apis.scoring import *  # noqa: F401, F403
 from llama_stack.apis.common.type_system import *  # noqa: F403
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
+    aggregate_accuracy,
+)
 
 
-class EqualityScorer(BaseScorer):
+class EqualityScoringFn(BaseScoringFn):
     """
-    A scorer that assigns a score of 1.0 if the input string matches the target string, and 0.0 otherwise.
+    A scoring_fn that assigns a score of 1.0 if the input string matches the target string, and 0.0 otherwise.
     """
 
-    scoring_function_def = ScoringFunctionDef(
+    scoring_function_def = ScoringFnDef(
         identifier="equality",
         description="Returns 1.0 if the input is equal to the target, 0.0 otherwise.",
         parameters=[],
@@ -38,12 +41,4 @@ class EqualityScorer(BaseScorer):
         }
 
     def aggregate(self, scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
-        assert len(scoring_results) > 0, "Empty scoring results provided."
-        num_correct = sum(result["score"] for result in scoring_results)
-        avg_score = num_correct / len(scoring_results)
-
-        return {
-            "accuracy": avg_score,
-            "num_correct": num_correct,
-            "num_total": len(scoring_results),
-        }
+        return aggregate_accuracy(scoring_results)
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py
new file mode 100644
index 000000000..c7ee68e26
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
+    BaseScoringFn,
+)
+from llama_stack.apis.scoring_functions import *  # noqa: F401, F403
+from llama_stack.apis.scoring import *  # noqa: F401, F403
+from llama_stack.apis.common.type_system import *  # noqa: F403
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
+    aggregate_accuracy,
+)
+
+
+class SubsetOfScoringFn(BaseScoringFn):
+    """
+    A scoring_fn that assigns a score of 1.0 if the expected string is included in the generated string, and 0.0 otherwise.
+    """
+
+    scoring_function_def = ScoringFnDef(
+        identifier="subset_of",
+        description="Returns 1.0 if the expected is included in generated, 0.0 otherwise.",
+        parameters=[],
+        return_type=NumberType(),
+    )
+
+    def score_row(self, input_row: Dict[str, Any]) -> ScoringResultRow:
+        assert "expected_answer" in input_row, "Expected answer not found in input row."
+        assert (
+            "generated_answer" in input_row
+        ), "Generated answer not found in input row."
+
+        expected_answer = input_row["expected_answer"]
+        generated_answer = input_row["generated_answer"]
+        score = 1.0 if expected_answer in generated_answer else 0.0
+        return {
+            "score": score,
+        }
+
+    def aggregate(self, scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+        return aggregate_accuracy(scoring_results)
diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py
new file mode 100644
index 000000000..fc7c923d9
--- /dev/null
+++ b/llama_stack/providers/registry/eval.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import List
+
+from llama_stack.distribution.datatypes import *  # noqa: F403
+
+
+def available_providers() -> List[ProviderSpec]:
+    return [
+        InlineProviderSpec(
+            api=Api.eval,
+            provider_type="meta-reference",
+            pip_packages=[],
+            module="llama_stack.providers.impls.meta_reference.eval",
+            config_class="llama_stack.providers.impls.meta_reference.eval.MetaReferenceEvalConfig",
+            api_dependencies=[
+                Api.datasetio,
+                Api.datasets,
+                Api.scoring,
+                Api.inference,
+            ],
+        ),
+    ]
diff --git a/llama_stack/providers/tests/datasetio/test_dataset.csv b/llama_stack/providers/tests/datasetio/test_dataset.csv
index a1a250753..f682c6d3d 100644
--- a/llama_stack/providers/tests/datasetio/test_dataset.csv
+++ b/llama_stack/providers/tests/datasetio/test_dataset.csv
@@ -1,6 +1,6 @@
-input_query,generated_answer,expected_answer
-What is the capital of France?,London,Paris
-Who is the CEO of Meta?,Mark Zuckerberg,Mark Zuckerberg
-What is the largest planet in our solar system?,Jupiter,Jupiter
-What is the smallest country in the world?,China,Vatican City
-What is the currency of Japan?,Yen,Yen
+input_query,generated_answer,expected_answer,chat_completion_input
+What is the capital of France?,London,Paris,"[{'role': 'user', 'content': 'What is the capital of France?'}]"
+Who is the CEO of Meta?,Mark Zuckerberg,Mark Zuckerberg,"[{'role': 'user', 'content': 'Who is the CEO of Meta?'}]"
+What is the largest planet in our solar system?,Jupiter,Jupiter,"[{'role': 'user', 'content': 'What is the largest planet in our solar system?'}]"
+What is the smallest country in the world?,China,Vatican City,"[{'role': 'user', 'content': 'What is the smallest country in the world?'}]"
+What is the currency of Japan?,Yen,Yen,"[{'role': 'user', 'content': 'What is the currency of Japan?'}]"
diff --git a/llama_stack/providers/tests/datasetio/test_datasetio.py b/llama_stack/providers/tests/datasetio/test_datasetio.py
index 9a351ba30..9bd80f94d 100644
--- a/llama_stack/providers/tests/datasetio/test_datasetio.py
+++ b/llama_stack/providers/tests/datasetio/test_datasetio.py
@@ -61,20 +61,31 @@ def data_url_from_file(file_path: str) -> str:
     return data_url
 
 
-async def register_dataset(datasets_impl: Datasets):
+async def register_dataset(
+    datasets_impl: Datasets, for_generation=False, dataset_id="test_dataset"
+):
     test_file = Path(os.path.abspath(__file__)).parent / "test_dataset.csv"
     test_url = data_url_from_file(str(test_file))
+
+    if for_generation:
+        dataset_schema = {
+            "expected_answer": StringType(),
+            "chat_completion_input": ChatCompletionInputType(),
+        }
+    else:
+        dataset_schema = {
+            "expected_answer": StringType(),
+            "input_query": StringType(),
+            "generated_answer": StringType(),
+        }
+
     dataset = DatasetDefWithProvider(
-        identifier="test_dataset",
+        identifier=dataset_id,
         provider_id=os.environ["PROVIDER_ID"],
         url=URL(
             uri=test_url,
         ),
-        dataset_schema={
-            "generated_answer": StringType(),
-            "expected_answer": StringType(),
-            "input_query": StringType(),
-        },
+        dataset_schema=dataset_schema,
     )
     await datasets_impl.register_dataset(dataset)
 
diff --git a/llama_stack/providers/tests/eval/__init__.py b/llama_stack/providers/tests/eval/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/providers/tests/eval/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/providers/tests/eval/provider_config_example.yaml b/llama_stack/providers/tests/eval/provider_config_example.yaml
new file mode 100644
index 000000000..1576d2ef0
--- /dev/null
+++ b/llama_stack/providers/tests/eval/provider_config_example.yaml
@@ -0,0 +1,18 @@
+providers:
+  datasetio:
+  - provider_id: test-meta
+    provider_type: meta-reference
+    config: {}
+  scoring:
+    - provider_id: test-meta
+      provider_type: meta-reference
+      config: {}
+  eval:
+    - provider_id: test-meta
+      provider_type: meta-reference
+      config: {}
+  inference:
+    - provider_id: test-tgi
+      provider_type: remote::tgi
+      config:
+        url: http://127.0.0.1:5009
diff --git a/llama_stack/providers/tests/eval/test_eval.py b/llama_stack/providers/tests/eval/test_eval.py
new file mode 100644
index 000000000..6b0d99a22
--- /dev/null
+++ b/llama_stack/providers/tests/eval/test_eval.py
@@ -0,0 +1,79 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import pytest
+import pytest_asyncio
+
+from llama_stack.apis.common.type_system import *  # noqa: F403
+from llama_stack.apis.datasetio import *  # noqa: F403
+from llama_stack.apis.eval.eval import ModelCandidate
+from llama_stack.distribution.datatypes import *  # noqa: F403
+
+from llama_models.llama3.api import SamplingParams
+
+from llama_stack.providers.tests.datasetio.test_datasetio import register_dataset
+from llama_stack.providers.tests.resolver import resolve_impls_for_test
+
+# How to run this test:
+#
+# 1. Ensure you have a conda with the right dependencies installed. This is a bit tricky
+#    since it depends on the provider you are testing. On top of that you need
+#    `pytest` and `pytest-asyncio` installed.
+#
+# 2. Copy and modify the provider_config_example.yaml depending on the provider you are testing.
+#
+# 3. Run:
+#
+# ```bash
+# PROVIDER_ID=<your_provider> \
+#   PROVIDER_CONFIG=provider_config.yaml \
+#   pytest -s llama_stack/providers/tests/eval/test_eval.py \
+#   --tb=short --disable-warnings
+# ```
+
+
+@pytest_asyncio.fixture(scope="session")
+async def eval_settings():
+    impls = await resolve_impls_for_test(
+        Api.eval, deps=[Api.datasetio, Api.scoring, Api.inference]
+    )
+    return {
+        "eval_impl": impls[Api.eval],
+        "scoring_impl": impls[Api.scoring],
+        "datasets_impl": impls[Api.datasets],
+    }
+
+
+@pytest.mark.asyncio
+async def test_eval(eval_settings):
+    datasets_impl = eval_settings["datasets_impl"]
+    await register_dataset(
+        datasets_impl,
+        for_generation=True,
+        dataset_id="test_dataset_for_eval",
+    )
+
+    response = await datasets_impl.list_datasets()
+    assert len(response) == 1
+
+    eval_impl = eval_settings["eval_impl"]
+    response = await eval_impl.evaluate_batch(
+        dataset_id=response[0].identifier,
+        candidate=ModelCandidate(
+            model="Llama3.2-1B-Instruct",
+            sampling_params=SamplingParams(),
+        ),
+        scoring_functions=["subset_of"],
+    )
+    assert response.job_id == "0"
+    job_status = await eval_impl.job_status(response.job_id)
+
+    assert job_status and job_status.value == "completed"
+
+    eval_response = await eval_impl.job_result(response.job_id)
+
+    assert eval_response is not None
+    assert len(eval_response.generations) == 5
+    assert "subset_of" in eval_response.scores
diff --git a/tests/examples/evals-tgi-run.yaml b/tests/examples/evals-tgi-run.yaml
index e56c43420..e63523889 100644
--- a/tests/examples/evals-tgi-run.yaml
+++ b/tests/examples/evals-tgi-run.yaml
@@ -14,7 +14,12 @@ apis:
 - datasets
 - datasetio
 - scoring
+- eval
 providers:
+  eval:
+  - provider_id: meta0
+    provider_type: meta-reference
+    config: {}
   scoring:
   - provider_id: meta0
     provider_type: meta-reference