diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 0f223b51b..fd7d767ae 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -909,59 +909,6 @@
                 }
             }
         },
-        "/v1/eval/benchmarks/{benchmark_id}/evaluations": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "EvaluateResponse object containing generations and scores",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluateResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Eval"
-                ],
-                "description": "Evaluate a list of rows on a benchmark.",
-                "parameters": [
-                    {
-                        "name": "benchmark_id",
-                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/EvaluateRowsRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}": {
             "get": {
                 "responses": {
@@ -1396,48 +1343,6 @@
                 ]
             }
         },
-        "/v1/scoring-functions/{scoring_fn_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ScoringFn"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "ScoringFunctions"
-                ],
-                "description": "",
-                "parameters": [
-                    {
-                        "name": "scoring_fn_id",
-                        "in": "path",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/shields/{identifier}": {
             "get": {
                 "responses": {
@@ -2372,153 +2277,6 @@
                 ]
             }
         },
-        "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "The status of the evaluationjob.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/JobStatus"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Eval"
-                ],
-                "description": "Get the status of a job.",
-                "parameters": [
-                    {
-                        "name": "benchmark_id",
-                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "job_id",
-                        "in": "path",
-                        "description": "The ID of the job to get the status of.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            },
-            "delete": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Eval"
-                ],
-                "description": "Cancel a job.",
-                "parameters": [
-                    {
-                        "name": "benchmark_id",
-                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "job_id",
-                        "in": "path",
-                        "description": "The ID of the job to cancel.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
-        "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "The result of the job.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/EvaluateResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Eval"
-                ],
-                "description": "Get the result of a job.",
-                "parameters": [
-                    {
-                        "name": "benchmark_id",
-                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "job_id",
-                        "in": "path",
-                        "description": "The ID of the job to get the result of.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
         "/v1/agents/{agent_id}/sessions": {
             "get": {
                 "responses": {
@@ -3050,73 +2808,6 @@
                 ]
             }
         },
-        "/v1/scoring-functions": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ListScoringFunctionsResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "ScoringFunctions"
-                ],
-                "description": "",
-                "parameters": []
-            },
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "ScoringFunctions"
-                ],
-                "description": "",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/RegisterScoringFunctionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/shields": {
             "get": {
                 "responses": {
@@ -3744,59 +3435,6 @@
                 }
             }
         },
-        "/v1/eval/benchmarks/{benchmark_id}/jobs": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "The job that was created to run the evaluation.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/Job"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Eval"
-                ],
-                "description": "Run an evaluation on a benchmark.",
-                "parameters": [
-                    {
-                        "name": "benchmark_id",
-                        "in": "path",
-                        "description": "The ID of the benchmark to run the evaluation on.",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/RunEvalRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/safety/run-shield": {
             "post": {
                 "responses": {
@@ -3919,92 +3557,6 @@
                 }
             }
         },
-        "/v1/scoring/score": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "ScoreResponse object containing rows and aggregated results",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ScoreResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Scoring"
-                ],
-                "description": "Score a list of rows.",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/ScoreRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/v1/scoring/score-batch": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ScoreBatchResponse"
-                                }
-                            }
-                        }
-                    },
-                    "400": {
-                        "$ref": "#/components/responses/BadRequest400"
-                    },
-                    "429": {
-                        "$ref": "#/components/responses/TooManyRequests429"
-                    },
-                    "500": {
-                        "$ref": "#/components/responses/InternalServerError500"
-                    },
-                    "default": {
-                        "$ref": "#/components/responses/DefaultError"
-                    }
-                },
-                "tags": [
-                    "Scoring"
-                ],
-                "description": "",
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/ScoreBatchRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/v1/post-training/supervised-fine-tune": {
             "post": {
                 "responses": {
@@ -6630,381 +6182,6 @@
                 "title": "EmbeddingsResponse",
                 "description": "Response containing generated embeddings."
             },
-            "AgentCandidate": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "agent",
-                        "default": "agent"
-                    },
-                    "config": {
-                        "$ref": "#/components/schemas/AgentConfig",
-                        "description": "The configuration for the agent candidate."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "config"
-                ],
-                "title": "AgentCandidate",
-                "description": "An agent candidate for evaluation."
-            },
-            "AggregationFunctionType": {
-                "type": "string",
-                "enum": [
-                    "average",
-                    "median",
-                    "categorical_count",
-                    "accuracy"
-                ],
-                "title": "AggregationFunctionType"
-            },
-            "BasicScoringFnParams": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "basic",
-                        "default": "basic"
-                    },
-                    "aggregation_functions": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/AggregationFunctionType"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "BasicScoringFnParams"
-            },
-            "BenchmarkConfig": {
-                "type": "object",
-                "properties": {
-                    "eval_candidate": {
-                        "$ref": "#/components/schemas/EvalCandidate",
-                        "description": "The candidate to evaluate."
-                    },
-                    "scoring_params": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ScoringFnParams"
-                        },
-                        "description": "Map between scoring function id and parameters for each scoring function you want to run"
-                    },
-                    "num_examples": {
-                        "type": "integer",
-                        "description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "eval_candidate",
-                    "scoring_params"
-                ],
-                "title": "BenchmarkConfig",
-                "description": "A benchmark configuration for evaluation."
-            },
-            "EvalCandidate": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/ModelCandidate"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentCandidate"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "model": "#/components/schemas/ModelCandidate",
-                        "agent": "#/components/schemas/AgentCandidate"
-                    }
-                }
-            },
-            "LLMAsJudgeScoringFnParams": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "llm_as_judge",
-                        "default": "llm_as_judge"
-                    },
-                    "judge_model": {
-                        "type": "string"
-                    },
-                    "prompt_template": {
-                        "type": "string"
-                    },
-                    "judge_score_regexes": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
-                    },
-                    "aggregation_functions": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/AggregationFunctionType"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "judge_model"
-                ],
-                "title": "LLMAsJudgeScoringFnParams"
-            },
-            "ModelCandidate": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "model",
-                        "default": "model"
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "The model ID to evaluate."
-                    },
-                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams",
-                        "description": "The sampling parameters for the model."
-                    },
-                    "system_message": {
-                        "$ref": "#/components/schemas/SystemMessage",
-                        "description": "(Optional) The system message providing instructions or context to the model."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "model",
-                    "sampling_params"
-                ],
-                "title": "ModelCandidate",
-                "description": "A model candidate for evaluation."
-            },
-            "RegexParserScoringFnParams": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "regex_parser",
-                        "default": "regex_parser"
-                    },
-                    "parsing_regexes": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        }
-                    },
-                    "aggregation_functions": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/AggregationFunctionType"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "RegexParserScoringFnParams"
-            },
-            "ScoringFnParams": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
-                    },
-                    {
-                        "$ref": "#/components/schemas/RegexParserScoringFnParams"
-                    },
-                    {
-                        "$ref": "#/components/schemas/BasicScoringFnParams"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
-                        "regex_parser": "#/components/schemas/RegexParserScoringFnParams",
-                        "basic": "#/components/schemas/BasicScoringFnParams"
-                    }
-                }
-            },
-            "EvaluateRowsRequest": {
-                "type": "object",
-                "properties": {
-                    "input_rows": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The rows to evaluate."
-                    },
-                    "scoring_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The scoring functions to use for the evaluation."
-                    },
-                    "benchmark_config": {
-                        "$ref": "#/components/schemas/BenchmarkConfig",
-                        "description": "The configuration for the benchmark."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "input_rows",
-                    "scoring_functions",
-                    "benchmark_config"
-                ],
-                "title": "EvaluateRowsRequest"
-            },
-            "EvaluateResponse": {
-                "type": "object",
-                "properties": {
-                    "generations": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The generations from the evaluation."
-                    },
-                    "scores": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ScoringResult"
-                        },
-                        "description": "The scores from the evaluation."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "generations",
-                    "scores"
-                ],
-                "title": "EvaluateResponse",
-                "description": "The response from an evaluation."
-            },
-            "ScoringResult": {
-                "type": "object",
-                "properties": {
-                    "score_rows": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The scoring result for each row. Each row is a map of column name to value."
-                    },
-                    "aggregated_results": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Map of metric name to aggregated value"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "score_rows",
-                    "aggregated_results"
-                ],
-                "title": "ScoringResult",
-                "description": "A scoring result for a single row."
-            },
             "Agent": {
                 "type": "object",
                 "properties": {
@@ -7732,268 +6909,6 @@
                 ],
                 "title": "ModelType"
             },
-            "AgentTurnInputType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "agent_turn_input",
-                        "default": "agent_turn_input"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "AgentTurnInputType"
-            },
-            "ArrayType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "array",
-                        "default": "array"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "ArrayType"
-            },
-            "BooleanType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "boolean",
-                        "default": "boolean"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "BooleanType"
-            },
-            "ChatCompletionInputType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "chat_completion_input",
-                        "default": "chat_completion_input"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "ChatCompletionInputType"
-            },
-            "CompletionInputType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "completion_input",
-                        "default": "completion_input"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "CompletionInputType"
-            },
-            "JsonType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "json",
-                        "default": "json"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "JsonType"
-            },
-            "NumberType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "number",
-                        "default": "number"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "NumberType"
-            },
-            "ObjectType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "object",
-                        "default": "object"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "ObjectType"
-            },
-            "ParamType": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/StringType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/NumberType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/BooleanType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ArrayType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ObjectType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/JsonType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/UnionType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ChatCompletionInputType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/CompletionInputType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnInputType"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "string": "#/components/schemas/StringType",
-                        "number": "#/components/schemas/NumberType",
-                        "boolean": "#/components/schemas/BooleanType",
-                        "array": "#/components/schemas/ArrayType",
-                        "object": "#/components/schemas/ObjectType",
-                        "json": "#/components/schemas/JsonType",
-                        "union": "#/components/schemas/UnionType",
-                        "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
-                        "completion_input": "#/components/schemas/CompletionInputType",
-                        "agent_turn_input": "#/components/schemas/AgentTurnInputType"
-                    }
-                }
-            },
-            "ScoringFn": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "scoring_function",
-                        "default": "scoring_function"
-                    },
-                    "description": {
-                        "type": "string"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    },
-                    "return_type": {
-                        "$ref": "#/components/schemas/ParamType"
-                    },
-                    "params": {
-                        "$ref": "#/components/schemas/ScoringFnParams"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_resource_id",
-                    "provider_id",
-                    "type",
-                    "metadata",
-                    "return_type"
-                ],
-                "title": "ScoringFn"
-            },
-            "StringType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "string",
-                        "default": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "StringType"
-            },
-            "UnionType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "union",
-                        "default": "union"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "UnionType"
-            },
             "Shield": {
                 "type": "object",
                 "properties": {
@@ -8564,6 +7479,26 @@
                 ],
                 "title": "GradeRequest"
             },
+            "AgentCandidate": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "agent",
+                        "default": "agent"
+                    },
+                    "agent_config": {
+                        "$ref": "#/components/schemas/AgentConfig"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "agent_config"
+                ],
+                "title": "AgentCandidate",
+                "description": "An agent candidate for evaluation."
+            },
             "EvaluationCandidate": {
                 "oneOf": [
                     {
@@ -8636,6 +7571,35 @@
                 ],
                 "title": "EvaluationJob"
             },
+            "ModelCandidate": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "model",
+                        "default": "model"
+                    },
+                    "model_id": {
+                        "type": "string"
+                    },
+                    "sampling_params": {
+                        "$ref": "#/components/schemas/SamplingParams",
+                        "description": "The sampling parameters for the model."
+                    },
+                    "system_message": {
+                        "$ref": "#/components/schemas/SystemMessage",
+                        "description": "(Optional) The system message providing instructions or context to the model."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model_id",
+                    "sampling_params"
+                ],
+                "title": "ModelCandidate",
+                "description": "A model candidate for evaluation."
+            },
             "GradeSyncRequest": {
                 "type": "object",
                 "properties": {
@@ -8698,6 +7662,73 @@
                 "title": "EvaluationResponse",
                 "description": "A response to an inline evaluation."
             },
+            "ScoringResult": {
+                "type": "object",
+                "properties": {
+                    "scores": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The scoring result for each row. Each row is a map of grader column name to value."
+                    },
+                    "metrics": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Map of metric name to aggregated value."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "scores",
+                    "metrics"
+                ],
+                "title": "ScoringResult",
+                "description": "A scoring result for a single row."
+            },
             "HealthInfo": {
                 "type": "object",
                 "properties": {
@@ -9030,17 +8061,6 @@
                 "title": "IterrowsResponse",
                 "description": "A paginated list of rows from a dataset."
             },
-            "JobStatus": {
-                "type": "string",
-                "enum": [
-                    "completed",
-                    "in_progress",
-                    "failed",
-                    "scheduled",
-                    "cancelled"
-                ],
-                "title": "JobStatus"
-            },
             "ListAgentSessionsResponse": {
                 "type": "object",
                 "properties": {
@@ -9301,22 +8321,6 @@
                 ],
                 "title": "ListRoutesResponse"
             },
-            "ListScoringFunctionsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ScoringFn"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListScoringFunctionsResponse"
-            },
             "ListShieldsResponse": {
                 "type": "object",
                 "properties": {
@@ -10501,36 +9505,6 @@
                 ],
                 "title": "RegisterModelRequest"
             },
-            "RegisterScoringFunctionRequest": {
-                "type": "object",
-                "properties": {
-                    "scoring_fn_id": {
-                        "type": "string"
-                    },
-                    "description": {
-                        "type": "string"
-                    },
-                    "return_type": {
-                        "$ref": "#/components/schemas/ParamType"
-                    },
-                    "provider_scoring_fn_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "params": {
-                        "$ref": "#/components/schemas/ScoringFnParams"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "scoring_fn_id",
-                    "description",
-                    "return_type"
-                ],
-                "title": "RegisterScoringFunctionRequest"
-            },
             "RegisterShieldRequest": {
                 "type": "object",
                 "properties": {
@@ -10686,33 +9660,6 @@
                 ],
                 "title": "RunRequest"
             },
-            "RunEvalRequest": {
-                "type": "object",
-                "properties": {
-                    "benchmark_config": {
-                        "$ref": "#/components/schemas/BenchmarkConfig",
-                        "description": "The configuration for the benchmark."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "benchmark_config"
-                ],
-                "title": "RunEvalRequest"
-            },
-            "Job": {
-                "type": "object",
-                "properties": {
-                    "job_id": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_id"
-                ],
-                "title": "Job"
-            },
             "RunShieldRequest": {
                 "type": "object",
                 "properties": {
@@ -10818,128 +9765,6 @@
                 ],
                 "title": "SaveSpansToDatasetRequest"
             },
-            "ScoreRequest": {
-                "type": "object",
-                "properties": {
-                    "input_rows": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The rows to score."
-                    },
-                    "scoring_functions": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/ScoringFnParams"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "The scoring functions to use for the scoring."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "input_rows",
-                    "scoring_functions"
-                ],
-                "title": "ScoreRequest"
-            },
-            "ScoreResponse": {
-                "type": "object",
-                "properties": {
-                    "results": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ScoringResult"
-                        },
-                        "description": "A map of scoring function name to ScoringResult."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "results"
-                ],
-                "title": "ScoreResponse",
-                "description": "The response from scoring."
-            },
-            "ScoreBatchRequest": {
-                "type": "object",
-                "properties": {
-                    "dataset_id": {
-                        "type": "string"
-                    },
-                    "scoring_functions": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/ScoringFnParams"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        }
-                    },
-                    "save_results_dataset": {
-                        "type": "boolean"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dataset_id",
-                    "scoring_functions",
-                    "save_results_dataset"
-                ],
-                "title": "ScoreBatchRequest"
-            },
-            "ScoreBatchResponse": {
-                "type": "object",
-                "properties": {
-                    "dataset_id": {
-                        "type": "string"
-                    },
-                    "results": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ScoringResult"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "results"
-                ],
-                "title": "ScoreBatchResponse"
-            },
             "LoraFinetuningConfig": {
                 "type": "object",
                 "properties": {
@@ -11292,10 +10117,6 @@
         {
             "name": "Datasets"
         },
-        {
-            "name": "Eval",
-            "x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
-        },
         {
             "name": "Evaluation"
         },
@@ -11326,12 +10147,6 @@
         {
             "name": "Safety"
         },
-        {
-            "name": "Scoring"
-        },
-        {
-            "name": "ScoringFunctions"
-        },
         {
             "name": "Shields"
         },
@@ -11363,7 +10178,6 @@
                 "Benchmarks",
                 "DatasetIO",
                 "Datasets",
-                "Eval",
                 "Evaluation",
                 "Files",
                 "Graders",
@@ -11373,8 +10187,6 @@
                 "PostTraining (Coming Soon)",
                 "Providers",
                 "Safety",
-                "Scoring",
-                "ScoringFunctions",
                 "Shields",
                 "SyntheticDataGeneration (Coming Soon)",
                 "Telemetry",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 7c4ea81b8..402106208 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -622,43 +622,6 @@ paths:
             schema:
               $ref: '#/components/schemas/EmbeddingsRequest'
         required: true
-  /v1/eval/benchmarks/{benchmark_id}/evaluations:
-    post:
-      responses:
-        '200':
-          description: >-
-            EvaluateResponse object containing generations and scores
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EvaluateResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Eval
-      description: Evaluate a list of rows on a benchmark.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EvaluateRowsRequest'
-        required: true
   /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}:
     get:
       responses:
@@ -956,34 +919,6 @@ paths:
           required: true
           schema:
             type: string
-  /v1/scoring-functions/{scoring_fn_id}:
-    get:
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ScoringFn'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      description: ''
-      parameters:
-        - name: scoring_fn_id
-          in: path
-          required: true
-          schema:
-            type: string
   /v1/shields/{identifier}:
     get:
       responses:
@@ -1627,109 +1562,6 @@ paths:
           required: false
           schema:
             type: integer
-  /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
-    get:
-      responses:
-        '200':
-          description: The status of the evaluationjob.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/JobStatus'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Eval
-      description: Get the status of a job.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the status of.
-          required: true
-          schema:
-            type: string
-    delete:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Eval
-      description: Cancel a job.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to cancel.
-          required: true
-          schema:
-            type: string
-  /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
-    get:
-      responses:
-        '200':
-          description: The result of the job.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EvaluateResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Eval
-      description: Get the result of a job.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-        - name: job_id
-          in: path
-          description: The ID of the job to get the result of.
-          required: true
-          schema:
-            type: string
   /v1/agents/{agent_id}/sessions:
     get:
       responses:
@@ -2098,53 +1930,6 @@ paths:
           required: false
           schema:
             $ref: '#/components/schemas/URL'
-  /v1/scoring-functions:
-    get:
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListScoringFunctionsResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      description: ''
-      parameters: []
-    post:
-      responses:
-        '200':
-          description: OK
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - ScoringFunctions
-      description: ''
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
-        required: true
   /v1/shields:
     get:
       responses:
@@ -2581,43 +2366,6 @@ paths:
             schema:
               $ref: '#/components/schemas/RunRequest'
         required: true
-  /v1/eval/benchmarks/{benchmark_id}/jobs:
-    post:
-      responses:
-        '200':
-          description: >-
-            The job that was created to run the evaluation.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/Job'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Eval
-      description: Run an evaluation on a benchmark.
-      parameters:
-        - name: benchmark_id
-          in: path
-          description: >-
-            The ID of the benchmark to run the evaluation on.
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RunEvalRequest'
-        required: true
   /v1/safety/run-shield:
     post:
       responses:
@@ -2704,65 +2452,6 @@ paths:
             schema:
               $ref: '#/components/schemas/SaveSpansToDatasetRequest'
         required: true
-  /v1/scoring/score:
-    post:
-      responses:
-        '200':
-          description: >-
-            ScoreResponse object containing rows and aggregated results
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ScoreResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Scoring
-      description: Score a list of rows.
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ScoreRequest'
-        required: true
-  /v1/scoring/score-batch:
-    post:
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ScoreBatchResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - Scoring
-      description: ''
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ScoreBatchRequest'
-        required: true
   /v1/post-training/supervised-fine-tune:
     post:
       responses:
@@ -4652,251 +4341,6 @@ components:
       title: EmbeddingsResponse
       description: >-
         Response containing generated embeddings.
-    AgentCandidate:
-      type: object
-      properties:
-        type:
-          type: string
-          const: agent
-          default: agent
-        config:
-          $ref: '#/components/schemas/AgentConfig'
-          description: >-
-            The configuration for the agent candidate.
-      additionalProperties: false
-      required:
-        - type
-        - config
-      title: AgentCandidate
-      description: An agent candidate for evaluation.
-    AggregationFunctionType:
-      type: string
-      enum:
-        - average
-        - median
-        - categorical_count
-        - accuracy
-      title: AggregationFunctionType
-    BasicScoringFnParams:
-      type: object
-      properties:
-        type:
-          type: string
-          const: basic
-          default: basic
-        aggregation_functions:
-          type: array
-          items:
-            $ref: '#/components/schemas/AggregationFunctionType'
-      additionalProperties: false
-      required:
-        - type
-      title: BasicScoringFnParams
-    BenchmarkConfig:
-      type: object
-      properties:
-        eval_candidate:
-          $ref: '#/components/schemas/EvalCandidate'
-          description: The candidate to evaluate.
-        scoring_params:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            Map between scoring function id and parameters for each scoring function
-            you want to run
-        num_examples:
-          type: integer
-          description: >-
-            (Optional) The number of examples to evaluate. If not provided, all examples
-            in the dataset will be evaluated
-      additionalProperties: false
-      required:
-        - eval_candidate
-        - scoring_params
-      title: BenchmarkConfig
-      description: >-
-        A benchmark configuration for evaluation.
-    EvalCandidate:
-      oneOf:
-        - $ref: '#/components/schemas/ModelCandidate'
-        - $ref: '#/components/schemas/AgentCandidate'
-      discriminator:
-        propertyName: type
-        mapping:
-          model: '#/components/schemas/ModelCandidate'
-          agent: '#/components/schemas/AgentCandidate'
-    LLMAsJudgeScoringFnParams:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm_as_judge
-          default: llm_as_judge
-        judge_model:
-          type: string
-        prompt_template:
-          type: string
-        judge_score_regexes:
-          type: array
-          items:
-            type: string
-        aggregation_functions:
-          type: array
-          items:
-            $ref: '#/components/schemas/AggregationFunctionType'
-      additionalProperties: false
-      required:
-        - type
-        - judge_model
-      title: LLMAsJudgeScoringFnParams
-    ModelCandidate:
-      type: object
-      properties:
-        type:
-          type: string
-          const: model
-          default: model
-        model:
-          type: string
-          description: The model ID to evaluate.
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-          description: The sampling parameters for the model.
-        system_message:
-          $ref: '#/components/schemas/SystemMessage'
-          description: >-
-            (Optional) The system message providing instructions or context to the
-            model.
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - sampling_params
-      title: ModelCandidate
-      description: A model candidate for evaluation.
-    RegexParserScoringFnParams:
-      type: object
-      properties:
-        type:
-          type: string
-          const: regex_parser
-          default: regex_parser
-        parsing_regexes:
-          type: array
-          items:
-            type: string
-        aggregation_functions:
-          type: array
-          items:
-            $ref: '#/components/schemas/AggregationFunctionType'
-      additionalProperties: false
-      required:
-        - type
-      title: RegexParserScoringFnParams
-    ScoringFnParams:
-      oneOf:
-        - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
-        - $ref: '#/components/schemas/RegexParserScoringFnParams'
-        - $ref: '#/components/schemas/BasicScoringFnParams'
-      discriminator:
-        propertyName: type
-        mapping:
-          llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
-          regex_parser: '#/components/schemas/RegexParserScoringFnParams'
-          basic: '#/components/schemas/BasicScoringFnParams'
-    EvaluateRowsRequest:
-      type: object
-      properties:
-        input_rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to evaluate.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the evaluation.
-        benchmark_config:
-          $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
-      required:
-        - input_rows
-        - scoring_functions
-        - benchmark_config
-      title: EvaluateRowsRequest
-    EvaluateResponse:
-      type: object
-      properties:
-        generations:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The generations from the evaluation.
-        scores:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringResult'
-          description: The scores from the evaluation.
-      additionalProperties: false
-      required:
-        - generations
-        - scores
-      title: EvaluateResponse
-      description: The response from an evaluation.
-    ScoringResult:
-      type: object
-      properties:
-        score_rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The scoring result for each row. Each row is a map of column name to value.
-        aggregated_results:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Map of metric name to aggregated value
-      additionalProperties: false
-      required:
-        - score_rows
-        - aggregated_results
-      title: ScoringResult
-      description: A scoring result for a single row.
     Agent:
       type: object
       properties:
@@ -5392,179 +4836,6 @@ components:
         - llm
         - embedding
       title: ModelType
-    AgentTurnInputType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: agent_turn_input
-          default: agent_turn_input
-      additionalProperties: false
-      required:
-        - type
-      title: AgentTurnInputType
-    ArrayType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: array
-          default: array
-      additionalProperties: false
-      required:
-        - type
-      title: ArrayType
-    BooleanType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: boolean
-          default: boolean
-      additionalProperties: false
-      required:
-        - type
-      title: BooleanType
-    ChatCompletionInputType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: chat_completion_input
-          default: chat_completion_input
-      additionalProperties: false
-      required:
-        - type
-      title: ChatCompletionInputType
-    CompletionInputType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: completion_input
-          default: completion_input
-      additionalProperties: false
-      required:
-        - type
-      title: CompletionInputType
-    JsonType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: json
-          default: json
-      additionalProperties: false
-      required:
-        - type
-      title: JsonType
-    NumberType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: number
-          default: number
-      additionalProperties: false
-      required:
-        - type
-      title: NumberType
-    ObjectType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: object
-          default: object
-      additionalProperties: false
-      required:
-        - type
-      title: ObjectType
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-        - $ref: '#/components/schemas/AgentTurnInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-          agent_turn_input: '#/components/schemas/AgentTurnInputType'
-    ScoringFn:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          const: scoring_function
-          default: scoring_function
-        description:
-          type: string
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_resource_id
-        - provider_id
-        - type
-        - metadata
-        - return_type
-      title: ScoringFn
-    StringType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: string
-          default: string
-      additionalProperties: false
-      required:
-        - type
-      title: StringType
-    UnionType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: union
-          default: union
-      additionalProperties: false
-      required:
-        - type
-      title: UnionType
     Shield:
       type: object
       properties:
@@ -5947,6 +5218,21 @@ components:
       required:
         - task
       title: GradeRequest
+    AgentCandidate:
+      type: object
+      properties:
+        type:
+          type: string
+          const: agent
+          default: agent
+        agent_config:
+          $ref: '#/components/schemas/AgentConfig'
+      additionalProperties: false
+      required:
+        - type
+        - agent_config
+      title: AgentCandidate
+      description: An agent candidate for evaluation.
     EvaluationCandidate:
       oneOf:
         - $ref: '#/components/schemas/ModelCandidate'
@@ -6000,6 +5286,30 @@ components:
         - task
         - candidate
       title: EvaluationJob
+    ModelCandidate:
+      type: object
+      properties:
+        type:
+          type: string
+          const: model
+          default: model
+        model_id:
+          type: string
+        sampling_params:
+          $ref: '#/components/schemas/SamplingParams'
+          description: The sampling parameters for the model.
+        system_message:
+          $ref: '#/components/schemas/SystemMessage'
+          description: >-
+            (Optional) The system message providing instructions or context to the
+            model.
+      additionalProperties: false
+      required:
+        - type
+        - model_id
+        - sampling_params
+      title: ModelCandidate
+      description: A model candidate for evaluation.
     GradeSyncRequest:
       type: object
       properties:
@@ -6044,6 +5354,41 @@ components:
         - scores
       title: EvaluationResponse
       description: A response to an inline evaluation.
+    ScoringResult:
+      type: object
+      properties:
+        scores:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            The scoring result for each row. Each row is a map of grader column name
+            to value.
+        metrics:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Map of metric name to aggregated value.
+      additionalProperties: false
+      required:
+        - scores
+        - metrics
+      title: ScoringResult
+      description: A scoring result for a single row.
     HealthInfo:
       type: object
       properties:
@@ -6230,15 +5575,6 @@ components:
         - data
       title: IterrowsResponse
       description: A paginated list of rows from a dataset.
-    JobStatus:
-      type: string
-      enum:
-        - completed
-        - in_progress
-        - failed
-        - scheduled
-        - cancelled
-      title: JobStatus
     ListAgentSessionsResponse:
       type: object
       properties:
@@ -6431,17 +5767,6 @@ components:
       required:
         - data
       title: ListRoutesResponse
-    ListScoringFunctionsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ScoringFn'
-      additionalProperties: false
-      required:
-        - data
-      title: ListScoringFunctionsResponse
     ListShieldsResponse:
       type: object
       properties:
@@ -7217,27 +6542,6 @@ components:
       required:
         - model_id
       title: RegisterModelRequest
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-        description:
-          type: string
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-        provider_scoring_fn_id:
-          type: string
-        provider_id:
-          type: string
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
     RegisterShieldRequest:
       type: object
       properties:
@@ -7338,25 +6642,6 @@ components:
         - task
         - candidate
       title: RunRequest
-    RunEvalRequest:
-      type: object
-      properties:
-        benchmark_config:
-          $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_config
-      title: RunEvalRequest
-    Job:
-      type: object
-      properties:
-        job_id:
-          type: string
-      additionalProperties: false
-      required:
-        - job_id
-      title: Job
     RunShieldRequest:
       type: object
       properties:
@@ -7429,81 +6714,6 @@ components:
         - attributes_to_save
         - dataset_id
       title: SaveSpansToDatasetRequest
-    ScoreRequest:
-      type: object
-      properties:
-        input_rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to score.
-        scoring_functions:
-          type: object
-          additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
-      additionalProperties: false
-      required:
-        - input_rows
-        - scoring_functions
-      title: ScoreRequest
-    ScoreResponse:
-      type: object
-      properties:
-        results:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult.
-      additionalProperties: false
-      required:
-        - results
-      title: ScoreResponse
-      description: The response from scoring.
-    ScoreBatchRequest:
-      type: object
-      properties:
-        dataset_id:
-          type: string
-        scoring_functions:
-          type: object
-          additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-        save_results_dataset:
-          type: boolean
-      additionalProperties: false
-      required:
-        - dataset_id
-        - scoring_functions
-        - save_results_dataset
-      title: ScoreBatchRequest
-    ScoreBatchResponse:
-      type: object
-      properties:
-        dataset_id:
-          type: string
-        results:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringResult'
-      additionalProperties: false
-      required:
-        - results
-      title: ScoreBatchResponse
     LoraFinetuningConfig:
       type: object
       properties:
@@ -7734,9 +6944,6 @@ tags:
   - name: Benchmarks
   - name: DatasetIO
   - name: Datasets
-  - name: Eval
-    x-displayName: >-
-      Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: Evaluation
   - name: Files
   - name: Graders
@@ -7759,8 +6966,6 @@ tags:
     x-displayName: >-
       Providers API for inspecting, listing, and modifying providers and their configurations.
   - name: Safety
-  - name: Scoring
-  - name: ScoringFunctions
   - name: Shields
   - name: SyntheticDataGeneration (Coming Soon)
   - name: Telemetry
@@ -7776,7 +6981,6 @@ x-tagGroups:
       - Benchmarks
       - DatasetIO
       - Datasets
-      - Eval
       - Evaluation
       - Files
       - Graders
@@ -7786,8 +6990,6 @@ x-tagGroups:
       - PostTraining (Coming Soon)
       - Providers
       - Safety
-      - Scoring
-      - ScoringFunctions
       - Shields
       - SyntheticDataGeneration (Coming Soon)
       - Telemetry
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index d5854b53e..fea22a414 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -187,9 +187,7 @@ a default SQLite store will be used.""",
     benchmarks: List[BenchmarkInput] = Field(default_factory=list)
     tool_groups: List[ToolGroupInput] = Field(default_factory=list)
 
-    logging: Optional[LoggingConfig] = Field(
-        default=None, description="Configuration for Llama Stack Logging"
-    )
+    logging: Optional[LoggingConfig] = Field(default=None, description="Configuration for Llama Stack Logging")
 
     server: ServerConfig = Field(
         default_factory=ServerConfig,
@@ -200,9 +198,7 @@ a default SQLite store will be used.""",
 class BuildConfig(BaseModel):
     version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
 
-    distribution_spec: DistributionSpec = Field(
-        description="The distribution spec to build including API providers. "
-    )
+    distribution_spec: DistributionSpec = Field(description="The distribution spec to build including API providers. ")
     image_type: str = Field(
         default="conda",
         description="Type of package to build (conda | container | venv)",
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index 2aa45322e..7e7237403 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -47,14 +47,9 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
 
 
 def providable_apis() -> List[Api]:
-    routing_table_apis = {
-        x.routing_table_api for x in builtin_automatically_routed_apis()
-    }
+    routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
     return [
-        api
-        for api in Api
-        if api not in routing_table_apis
-        and api not in [Api.inspect, Api.providers, Api.benchmarks]
+        api for api in Api if api not in routing_table_apis and api not in [Api.inspect, Api.providers, Api.benchmarks]
     ]
 
 
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index c08ee9881..3a6140478 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -103,9 +103,7 @@ async def resolve_impls(
     2. Sorting them in dependency order.
     3. Instantiating them with required dependencies.
     """
-    routing_table_apis = {
-        x.routing_table_api for x in builtin_automatically_routed_apis()
-    }
+    routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
     router_apis = {x.router_api for x in builtin_automatically_routed_apis()}
 
     providers_with_specs = validate_and_prepare_providers(
@@ -113,9 +111,7 @@ async def resolve_impls(
     )
 
     apis_to_serve = run_config.apis or set(
-        list(providers_with_specs.keys())
-        + [x.value for x in routing_table_apis]
-        + [x.value for x in router_apis]
+        list(providers_with_specs.keys()) + [x.value for x in routing_table_apis] + [x.value for x in router_apis]
     )
 
     providers_with_specs.update(specs_for_autorouted_apis(apis_to_serve))
@@ -180,23 +176,17 @@ def validate_and_prepare_providers(
     for api_str, providers in run_config.providers.items():
         api = Api(api_str)
         if api in routing_table_apis:
-            raise ValueError(
-                f"Provider for `{api_str}` is automatically provided and cannot be overridden"
-            )
+            raise ValueError(f"Provider for `{api_str}` is automatically provided and cannot be overridden")
 
         specs = {}
         for provider in providers:
             if not provider.provider_id or provider.provider_id == "__disabled__":
-                logger.warning(
-                    f"Provider `{provider.provider_type}` for API `{api}` is disabled"
-                )
+                logger.warning(f"Provider `{provider.provider_type}` for API `{api}` is disabled")
                 continue
 
             validate_provider(provider, api, provider_registry)
             p = provider_registry[api][provider.provider_type]
-            p.deps__ = [a.value for a in p.api_dependencies] + [
-                a.value for a in p.optional_api_dependencies
-            ]
+            p.deps__ = [a.value for a in p.api_dependencies] + [a.value for a in p.optional_api_dependencies]
             spec = ProviderWithSpec(spec=p, **provider.model_dump())
             specs[provider.provider_id] = spec
 
@@ -206,14 +196,10 @@ def validate_and_prepare_providers(
     return providers_with_specs
 
 
-def validate_provider(
-    provider: Provider, api: Api, provider_registry: ProviderRegistry
-):
+def validate_provider(provider: Provider, api: Api, provider_registry: ProviderRegistry):
     """Validates if the provider is allowed and handles deprecations."""
     if provider.provider_type not in provider_registry[api]:
-        raise ValueError(
-            f"Provider `{provider.provider_type}` is not available for API `{api}`"
-        )
+        raise ValueError(f"Provider `{provider.provider_type}` is not available for API `{api}`")
 
     p = provider_registry[api][provider.provider_type]
     if p.deprecation_error:
@@ -288,9 +274,7 @@ async def instantiate_providers(
 ) -> Dict:
     """Instantiates providers asynchronously while managing dependencies."""
     impls: Dict[Api, Any] = {}
-    inner_impls_by_provider_id: Dict[str, Dict[str, Any]] = {
-        f"inner-{x.value}": {} for x in router_apis
-    }
+    inner_impls_by_provider_id: Dict[str, Dict[str, Any]] = {f"inner-{x.value}": {} for x in router_apis}
     for api_str, provider in sorted_providers:
         deps = {a: impls[a] for a in provider.spec.api_dependencies}
         for a in provider.spec.optional_api_dependencies:
@@ -299,9 +283,7 @@ async def instantiate_providers(
 
         inner_impls = {}
         if isinstance(provider.spec, RoutingTableProviderSpec):
-            inner_impls = inner_impls_by_provider_id[
-                f"inner-{provider.spec.router_api.value}"
-            ]
+            inner_impls = inner_impls_by_provider_id[f"inner-{provider.spec.router_api.value}"]
 
         impl = await instantiate_provider(provider, deps, inner_impls, dist_registry)
 
@@ -359,9 +341,7 @@ async def instantiate_provider(
 
     provider_spec = provider.spec
     if not hasattr(provider_spec, "module"):
-        raise AttributeError(
-            f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute"
-        )
+        raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
 
     module = importlib.import_module(provider_spec.module)
     args = []
@@ -398,10 +378,7 @@ async def instantiate_provider(
     # TODO: check compliance for special tool groups
     # the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
     check_protocol_compliance(impl, protocols[provider_spec.api])
-    if (
-        not isinstance(provider_spec, AutoRoutedProviderSpec)
-        and provider_spec.api in additional_protocols
-    ):
+    if not isinstance(provider_spec, AutoRoutedProviderSpec) and provider_spec.api in additional_protocols:
         additional_api, _, _ = additional_protocols[provider_spec.api]
         check_protocol_compliance(impl, additional_api)
 
@@ -429,19 +406,12 @@ def check_protocol_compliance(obj: Any, protocol: Any) -> None:
                 obj_params = set(obj_sig.parameters)
                 obj_params.discard("self")
                 if not (proto_params <= obj_params):
-                    logger.error(
-                        f"Method {name} incompatible proto: {proto_params} vs. obj: {obj_params}"
-                    )
+                    logger.error(f"Method {name} incompatible proto: {proto_params} vs. obj: {obj_params}")
                     missing_methods.append((name, "signature_mismatch"))
                 else:
                     # Check if the method is actually implemented in the class
-                    method_owner = next(
-                        (cls for cls in mro if name in cls.__dict__), None
-                    )
-                    if (
-                        method_owner is None
-                        or method_owner.__name__ == protocol.__name__
-                    ):
+                    method_owner = next((cls for cls in mro if name in cls.__dict__), None)
+                    if method_owner is None or method_owner.__name__ == protocol.__name__:
                         missing_methods.append((name, "not_actually_implemented"))
 
     if missing_methods:
diff --git a/llama_stack/distribution/routers/__init__.py b/llama_stack/distribution/routers/__init__.py
index 803c94a92..6e2287b87 100644
--- a/llama_stack/distribution/routers/__init__.py
+++ b/llama_stack/distribution/routers/__init__.py
@@ -44,9 +44,7 @@ async def get_routing_table_impl(
     return impl
 
 
-async def get_auto_router_impl(
-    api: Api, routing_table: RoutingTable, deps: Dict[str, Any]
-) -> Any:
+async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: Dict[str, Any]) -> Any:
     from .routers import (
         DatasetIORouter,
         EvalRouter,
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index 369789a16..6c77d09e8 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -8,19 +8,12 @@ import time
 from typing import Any, AsyncGenerator, AsyncIterator, Dict, List, Optional, Union
 
 from llama_stack.apis.common.content_types import (
+    URL,
     InterleavedContent,
     InterleavedContentItem,
-    URL,
 )
 from llama_stack.apis.datasetio import DatasetIO, IterrowsResponse
 from llama_stack.apis.datasets import DatasetPurpose, DataSource
-from llama_stack.apis.eval import (
-    BenchmarkConfig,
-    Eval,
-    EvaluateResponse,
-    Job,
-    JobStatus,
-)
 from llama_stack.apis.inference import (
     ChatCompletionResponse,
     ChatCompletionResponseEventType,
@@ -42,12 +35,6 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.safety import RunShieldResponse, Safety
-from llama_stack.apis.scoring import (
-    ScoreBatchResponse,
-    ScoreResponse,
-    Scoring,
-    ScoringFnParams,
-)
 from llama_stack.apis.shields import Shield
 from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.apis.tools import (
@@ -94,9 +81,7 @@ class VectorIORouter(VectorIO):
         provider_id: Optional[str] = None,
         provider_vector_db_id: Optional[str] = None,
     ) -> None:
-        logger.debug(
-            f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}"
-        )
+        logger.debug(f"VectorIORouter.register_vector_db: {vector_db_id}, {embedding_model}")
         await self.routing_table.register_vector_db(
             vector_db_id,
             embedding_model,
@@ -114,9 +99,7 @@ class VectorIORouter(VectorIO):
         logger.debug(
             f"VectorIORouter.insert_chunks: {vector_db_id}, {len(chunks)} chunks, ttl_seconds={ttl_seconds}, chunk_ids={[chunk.metadata['document_id'] for chunk in chunks[:3]]}{' and more...' if len(chunks) > 3 else ''}",
         )
-        return await self.routing_table.get_provider_impl(vector_db_id).insert_chunks(
-            vector_db_id, chunks, ttl_seconds
-        )
+        return await self.routing_table.get_provider_impl(vector_db_id).insert_chunks(vector_db_id, chunks, ttl_seconds)
 
     async def query_chunks(
         self,
@@ -125,9 +108,7 @@ class VectorIORouter(VectorIO):
         params: Optional[Dict[str, Any]] = None,
     ) -> QueryChunksResponse:
         logger.debug(f"VectorIORouter.query_chunks: {vector_db_id}")
-        return await self.routing_table.get_provider_impl(vector_db_id).query_chunks(
-            vector_db_id, query, params
-        )
+        return await self.routing_table.get_provider_impl(vector_db_id).query_chunks(vector_db_id, query, params)
 
 
 class InferenceRouter(Inference):
@@ -164,9 +145,7 @@ class InferenceRouter(Inference):
         logger.debug(
             f"InferenceRouter.register_model: {model_id=} {provider_model_id=} {provider_id=} {metadata=} {model_type=}",
         )
-        await self.routing_table.register_model(
-            model_id, provider_model_id, provider_id, metadata, model_type
-        )
+        await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
 
     def _construct_metrics(
         self,
@@ -220,16 +199,11 @@ class InferenceRouter(Inference):
         total_tokens: int,
         model: Model,
     ) -> List[MetricInResponse]:
-        metrics = self._construct_metrics(
-            prompt_tokens, completion_tokens, total_tokens, model
-        )
+        metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
         if self.telemetry:
             for metric in metrics:
                 await self.telemetry.log_event(metric)
-        return [
-            MetricInResponse(metric=metric.metric, value=metric.value)
-            for metric in metrics
-        ]
+        return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
 
     async def _count_tokens(
         self,
@@ -254,9 +228,7 @@ class InferenceRouter(Inference):
         stream: Optional[bool] = False,
         logprobs: Optional[LogProbConfig] = None,
         tool_config: Optional[ToolConfig] = None,
-    ) -> Union[
-        ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]
-    ]:
+    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
         logger.debug(
             f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
         )
@@ -266,19 +238,12 @@ class InferenceRouter(Inference):
         if model is None:
             raise ValueError(f"Model '{model_id}' not found")
         if model.model_type == ModelType.embedding:
-            raise ValueError(
-                f"Model '{model_id}' is an embedding model and does not support chat completions"
-            )
+            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
         if tool_config:
             if tool_choice and tool_choice != tool_config.tool_choice:
                 raise ValueError("tool_choice and tool_config.tool_choice must match")
-            if (
-                tool_prompt_format
-                and tool_prompt_format != tool_config.tool_prompt_format
-            ):
-                raise ValueError(
-                    "tool_prompt_format and tool_config.tool_prompt_format must match"
-                )
+            if tool_prompt_format and tool_prompt_format != tool_config.tool_prompt_format:
+                raise ValueError("tool_prompt_format and tool_config.tool_prompt_format must match")
         else:
             params = {}
             if tool_choice:
@@ -296,14 +261,9 @@ class InferenceRouter(Inference):
             pass
         else:
             # verify tool_choice is one of the tools
-            tool_names = [
-                t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value
-                for t in tools
-            ]
+            tool_names = [t.tool_name if isinstance(t.tool_name, str) else t.tool_name.value for t in tools]
             if tool_config.tool_choice not in tool_names:
-                raise ValueError(
-                    f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}"
-                )
+                raise ValueError(f"Tool choice {tool_config.tool_choice} is not one of the tools: {tool_names}")
 
         params = dict(
             model_id=model_id,
@@ -318,25 +278,17 @@ class InferenceRouter(Inference):
             tool_config=tool_config,
         )
         provider = self.routing_table.get_provider_impl(model_id)
-        prompt_tokens = await self._count_tokens(
-            messages, tool_config.tool_prompt_format
-        )
+        prompt_tokens = await self._count_tokens(messages, tool_config.tool_prompt_format)
 
         if stream:
 
             async def stream_generator():
                 completion_text = ""
                 async for chunk in await provider.chat_completion(**params):
-                    if (
-                        chunk.event.event_type
-                        == ChatCompletionResponseEventType.progress
-                    ):
+                    if chunk.event.event_type == ChatCompletionResponseEventType.progress:
                         if chunk.event.delta.type == "text":
                             completion_text += chunk.event.delta.text
-                    if (
-                        chunk.event.event_type
-                        == ChatCompletionResponseEventType.complete
-                    ):
+                    if chunk.event.event_type == ChatCompletionResponseEventType.complete:
                         completion_tokens = await self._count_tokens(
                             [
                                 CompletionMessage(
@@ -353,11 +305,7 @@ class InferenceRouter(Inference):
                             total_tokens,
                             model,
                         )
-                        chunk.metrics = (
-                            metrics
-                            if chunk.metrics is None
-                            else chunk.metrics + metrics
-                        )
+                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
                     yield chunk
 
             return stream_generator()
@@ -374,9 +322,7 @@ class InferenceRouter(Inference):
                 total_tokens,
                 model,
             )
-            response.metrics = (
-                metrics if response.metrics is None else response.metrics + metrics
-            )
+            response.metrics = metrics if response.metrics is None else response.metrics + metrics
             return response
 
     async def completion(
@@ -397,9 +343,7 @@ class InferenceRouter(Inference):
         if model is None:
             raise ValueError(f"Model '{model_id}' not found")
         if model.model_type == ModelType.embedding:
-            raise ValueError(
-                f"Model '{model_id}' is an embedding model and does not support chat completions"
-            )
+            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
         provider = self.routing_table.get_provider_impl(model_id)
         params = dict(
             model_id=model_id,
@@ -419,11 +363,7 @@ class InferenceRouter(Inference):
                 async for chunk in await provider.completion(**params):
                     if hasattr(chunk, "delta"):
                         completion_text += chunk.delta
-                    if (
-                        hasattr(chunk, "stop_reason")
-                        and chunk.stop_reason
-                        and self.telemetry
-                    ):
+                    if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry:
                         completion_tokens = await self._count_tokens(completion_text)
                         total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
                         metrics = await self._compute_and_log_token_usage(
@@ -432,11 +372,7 @@ class InferenceRouter(Inference):
                             total_tokens,
                             model,
                         )
-                        chunk.metrics = (
-                            metrics
-                            if chunk.metrics is None
-                            else chunk.metrics + metrics
-                        )
+                        chunk.metrics = metrics if chunk.metrics is None else chunk.metrics + metrics
                     yield chunk
 
             return stream_generator()
@@ -450,9 +386,7 @@ class InferenceRouter(Inference):
                 total_tokens,
                 model,
             )
-            response.metrics = (
-                metrics if response.metrics is None else response.metrics + metrics
-            )
+            response.metrics = metrics if response.metrics is None else response.metrics + metrics
             return response
 
     async def embeddings(
@@ -468,9 +402,7 @@ class InferenceRouter(Inference):
         if model is None:
             raise ValueError(f"Model '{model_id}' not found")
         if model.model_type == ModelType.llm:
-            raise ValueError(
-                f"Model '{model_id}' is an LLM model and does not support embeddings"
-            )
+            raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings")
         return await self.routing_table.get_provider_impl(model_id).embeddings(
             model_id=model_id,
             contents=contents,
@@ -504,9 +436,7 @@ class SafetyRouter(Safety):
         params: Optional[Dict[str, Any]] = None,
     ) -> Shield:
         logger.debug(f"SafetyRouter.register_shield: {shield_id}")
-        return await self.routing_table.register_shield(
-            shield_id, provider_shield_id, provider_id, params
-        )
+        return await self.routing_table.register_shield(shield_id, provider_shield_id, provider_id, params)
 
     async def run_shield(
         self,
@@ -607,9 +537,9 @@ class ToolRuntimeRouter(ToolRuntime):
             logger.debug(
                 f"ToolRuntimeRouter.RagToolImpl.insert: {vector_db_id}, {len(documents)} documents, chunk_size={chunk_size_in_tokens}"
             )
-            return await self.routing_table.get_provider_impl(
-                "insert_into_memory"
-            ).insert(documents, vector_db_id, chunk_size_in_tokens)
+            return await self.routing_table.get_provider_impl("insert_into_memory").insert(
+                documents, vector_db_id, chunk_size_in_tokens
+            )
 
     def __init__(
         self,
@@ -642,6 +572,4 @@ class ToolRuntimeRouter(ToolRuntime):
         self, tool_group_id: Optional[str] = None, mcp_endpoint: Optional[URL] = None
     ) -> List[ToolDef]:
         logger.debug(f"ToolRuntimeRouter.list_runtime_tools: {tool_group_id}")
-        return await self.routing_table.get_provider_impl(tool_group_id).list_tools(
-            tool_group_id, mcp_endpoint
-        )
+        return await self.routing_table.get_provider_impl(tool_group_id).list_tools(tool_group_id, mcp_endpoint)
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index 9aaf83483..69834868e 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -12,7 +12,6 @@ from pydantic import TypeAdapter
 
 from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
 from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.type_system import ParamType
 from llama_stack.apis.datasets import (
     Dataset,
     DatasetPurpose,
@@ -95,9 +94,7 @@ class CommonRoutingTableImpl(RoutingTable):
         self.dist_registry = dist_registry
 
     async def initialize(self) -> None:
-        async def add_objects(
-            objs: List[RoutableObjectWithProvider], provider_id: str, cls
-        ) -> None:
+        async def add_objects(objs: List[RoutableObjectWithProvider], provider_id: str, cls) -> None:
             for obj in objs:
                 if cls is None:
                     obj.provider_id = provider_id
@@ -126,9 +123,7 @@ class CommonRoutingTableImpl(RoutingTable):
         for p in self.impls_by_provider_id.values():
             await p.shutdown()
 
-    def get_provider_impl(
-        self, routing_key: str, provider_id: Optional[str] = None
-    ) -> Any:
+    def get_provider_impl(self, routing_key: str, provider_id: Optional[str] = None) -> Any:
         def apiname_object():
             if isinstance(self, ModelsRoutingTable):
                 return ("Inference", "model")
@@ -164,9 +159,7 @@ class CommonRoutingTableImpl(RoutingTable):
 
         raise ValueError(f"Provider not found for `{routing_key}`")
 
-    async def get_object_by_identifier(
-        self, type: str, identifier: str
-    ) -> Optional[RoutableObjectWithProvider]:
+    async def get_object_by_identifier(self, type: str, identifier: str) -> Optional[RoutableObjectWithProvider]:
         # Get from disk registry
         obj = await self.dist_registry.get(type, identifier)
         if not obj:
@@ -176,13 +169,9 @@ class CommonRoutingTableImpl(RoutingTable):
 
     async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
         await self.dist_registry.delete(obj.type, obj.identifier)
-        await unregister_object_from_provider(
-            obj, self.impls_by_provider_id[obj.provider_id]
-        )
+        await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
 
-    async def register_object(
-        self, obj: RoutableObjectWithProvider
-    ) -> RoutableObjectWithProvider:
+    async def register_object(self, obj: RoutableObjectWithProvider) -> RoutableObjectWithProvider:
         # if provider_id is not specified, pick an arbitrary one from existing entries
         if not obj.provider_id and len(self.impls_by_provider_id) > 0:
             obj.provider_id = list(self.impls_by_provider_id.keys())[0]
@@ -240,9 +229,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
         if model_type is None:
             model_type = ModelType.llm
         if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
-            raise ValueError(
-                "Embedding model must have an embedding dimension in its metadata"
-            )
+            raise ValueError("Embedding model must have an embedding dimension in its metadata")
         model = Model(
             identifier=model_id,
             provider_resource_id=provider_model_id,
@@ -262,9 +249,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
 
 class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
     async def list_shields(self) -> ListShieldsResponse:
-        return ListShieldsResponse(
-            data=await self.get_all_with_type(ResourceType.shield.value)
-        )
+        return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
 
     async def get_shield(self, identifier: str) -> Shield:
         shield = await self.get_object_by_identifier("shield", identifier)
@@ -329,18 +314,14 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
                         f"No provider specified and multiple providers available. Arbitrarily selected the first provider {provider_id}."
                     )
             else:
-                raise ValueError(
-                    "No provider available. Please configure a vector_io provider."
-                )
+                raise ValueError("No provider available. Please configure a vector_io provider.")
         model = await self.get_object_by_identifier("model", embedding_model)
         if model is None:
             raise ValueError(f"Model {embedding_model} not found")
         if model.model_type != ModelType.embedding:
             raise ValueError(f"Model {embedding_model} is not an embedding model")
         if "embedding_dimension" not in model.metadata:
-            raise ValueError(
-                f"Model {embedding_model} does not have an embedding dimension"
-            )
+            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
         vector_db_data = {
             "identifier": vector_db_id,
             "type": ResourceType.vector_db.value,
@@ -362,9 +343,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
 
 class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
     async def list_datasets(self) -> ListDatasetsResponse:
-        return ListDatasetsResponse(
-            data=await self.get_all_with_type(ResourceType.dataset.value)
-        )
+        return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value))
 
     async def get_dataset(self, dataset_id: str) -> Dataset:
         dataset = await self.get_object_by_identifier("dataset", dataset_id)
@@ -447,9 +426,7 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
         # TODO (xiyan): we will need a way to infer provider_id for evaluation
         # keep it as meta-reference for now
         if len(self.impls_by_provider_id) == 0:
-            raise ValueError(
-                "No evaluation providers available. Please configure an evaluation provider."
-            )
+            raise ValueError("No evaluation providers available. Please configure an evaluation provider.")
         provider_id = list(self.impls_by_provider_id.keys())[0]
 
         benchmark = Benchmark(
@@ -491,12 +468,8 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
         args: Optional[Dict[str, Any]] = None,
     ) -> None:
         tools = []
-        tool_defs = await self.impls_by_provider_id[provider_id].list_runtime_tools(
-            toolgroup_id, mcp_endpoint
-        )
-        tool_host = (
-            ToolHost.model_context_protocol if mcp_endpoint else ToolHost.distribution
-        )
+        tool_defs = await self.impls_by_provider_id[provider_id].list_runtime_tools(toolgroup_id, mcp_endpoint)
+        tool_host = ToolHost.model_context_protocol if mcp_endpoint else ToolHost.distribution
 
         for tool_def in tool_defs:
             tools.append(
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 9ec52bce0..90f55fc87 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -105,9 +105,7 @@ class EnvVarError(Exception):
     def __init__(self, var_name: str, path: str = ""):
         self.var_name = var_name
         self.path = path
-        super().__init__(
-            f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}"
-        )
+        super().__init__(f"Environment variable '{var_name}' not set or empty{f' at {path}' if path else ''}")
 
 
 def redact_sensitive_fields(data: Dict[str, Any]) -> Dict[str, Any]:
@@ -198,9 +196,7 @@ def validate_env_pair(env_pair: str) -> tuple[str, str]:
         if not key:
             raise ValueError(f"Empty key in environment variable pair: {env_pair}")
         if not all(c.isalnum() or c == "_" for c in key):
-            raise ValueError(
-                f"Key must contain only alphanumeric characters and underscores: {key}"
-            )
+            raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}")
         return key, value
     except ValueError as e:
         raise ValueError(
@@ -213,20 +209,14 @@ def validate_env_pair(env_pair: str) -> tuple[str, str]:
 async def construct_stack(
     run_config: StackRunConfig, provider_registry: Optional[ProviderRegistry] = None
 ) -> Dict[Api, Any]:
-    dist_registry, _ = await create_dist_registry(
-        run_config.metadata_store, run_config.image_name
-    )
-    impls = await resolve_impls(
-        run_config, provider_registry or get_provider_registry(), dist_registry
-    )
+    dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
+    impls = await resolve_impls(run_config, provider_registry or get_provider_registry(), dist_registry)
     await register_resources(run_config, impls)
     return impls
 
 
 def get_stack_run_config_from_template(template: str) -> StackRunConfig:
-    template_path = (
-        importlib.resources.files("llama_stack") / f"templates/{template}/run.yaml"
-    )
+    template_path = importlib.resources.files("llama_stack") / f"templates/{template}/run.yaml"
 
     with importlib.resources.as_file(template_path) as path:
         if not path.exists():
@@ -269,9 +259,7 @@ def run_config_from_adhoc_config_spec(
 
         # call method "sample_run_config" on the provider spec config class
         provider_config_type = instantiate_class_type(provider_spec.config_class)
-        provider_config = replace_env_vars(
-            provider_config_type.sample_run_config(__distro_dir__=distro_dir)
-        )
+        provider_config = replace_env_vars(provider_config_type.sample_run_config(__distro_dir__=distro_dir))
 
         provider_configs_by_api[api_str] = [
             Provider(
diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/distribution/ui/modules/api.py
index 0e2e1d14f..1746a8a4f 100644
--- a/llama_stack/distribution/ui/modules/api.py
+++ b/llama_stack/distribution/ui/modules/api.py
@@ -22,9 +22,7 @@ class LlamaStackApi:
             },
         )
 
-    def run_scoring(
-        self, row, scoring_function_ids: list[str], scoring_params: Optional[dict]
-    ):
+    def run_scoring(self, row, scoring_function_ids: list[str], scoring_params: Optional[dict]):
         """Run scoring on a single row"""
         if not scoring_params:
             scoring_params = {fn_id: None for fn_id in scoring_function_ids}
diff --git a/llama_stack/distribution/ui/page/distribution/resources.py b/llama_stack/distribution/ui/page/distribution/resources.py
index da42c468c..28f35fbd0 100644
--- a/llama_stack/distribution/ui/page/distribution/resources.py
+++ b/llama_stack/distribution/ui/page/distribution/resources.py
@@ -4,12 +4,13 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+from streamlit_option_menu import option_menu
+
 from llama_stack.distribution.ui.page.distribution.datasets import datasets
 from llama_stack.distribution.ui.page.distribution.eval_tasks import benchmarks
 from llama_stack.distribution.ui.page.distribution.models import models
 from llama_stack.distribution.ui.page.distribution.shields import shields
 from llama_stack.distribution.ui.page.distribution.vector_dbs import vector_dbs
-from streamlit_option_menu import option_menu
 
 
 def resources_page():
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
index ac41df000..d9b129a8b 100644
--- a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
+++ b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
@@ -20,7 +20,5 @@ context_entity_recall_fn_def = ScoringFn(
     provider_id="braintrust",
     provider_resource_id="context-entity-recall",
     return_type=NumberType(),
-    params=BasicScoringFnParams(
-        aggregation_functions=[AggregationFunctionType.average]
-    ),
+    params=BasicScoringFnParams(aggregation_functions=[AggregationFunctionType.average]),
 )
diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py
deleted file mode 100644
index 755d30382..000000000
--- a/llama_stack/providers/registry/eval.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import List
-
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
-
-
-def available_providers() -> List[ProviderSpec]:
-    return [
-        InlineProviderSpec(
-            api=Api.eval,
-            provider_type="inline::meta-reference",
-            pip_packages=["tree_sitter"],
-            module="llama_stack.providers.inline.eval.meta_reference",
-            config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig",
-            api_dependencies=[
-                Api.datasetio,
-                Api.datasets,
-                Api.scoring,
-                Api.inference,
-                Api.agents,
-            ],
-        ),
-    ]
diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py
deleted file mode 100644
index ca09be984..000000000
--- a/llama_stack/providers/registry/scoring.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import List
-
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
-
-
-def available_providers() -> List[ProviderSpec]:
-    return [
-        InlineProviderSpec(
-            api=Api.scoring,
-            provider_type="inline::basic",
-            pip_packages=[],
-            module="llama_stack.providers.inline.scoring.basic",
-            config_class="llama_stack.providers.inline.scoring.basic.BasicScoringConfig",
-            api_dependencies=[
-                Api.datasetio,
-                Api.datasets,
-            ],
-        ),
-        InlineProviderSpec(
-            api=Api.scoring,
-            provider_type="inline::llm-as-judge",
-            pip_packages=[],
-            module="llama_stack.providers.inline.scoring.llm_as_judge",
-            config_class="llama_stack.providers.inline.scoring.llm_as_judge.LlmAsJudgeScoringConfig",
-            api_dependencies=[
-                Api.datasetio,
-                Api.datasets,
-                Api.inference,
-            ],
-        ),
-        InlineProviderSpec(
-            api=Api.scoring,
-            provider_type="inline::braintrust",
-            pip_packages=["autoevals", "openai"],
-            module="llama_stack.providers.inline.scoring.braintrust",
-            config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
-            api_dependencies=[
-                Api.datasetio,
-                Api.datasets,
-            ],
-            provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator",
-        ),
-    ]
diff --git a/llama_stack/providers/utils/common/data_schema_validator.py b/llama_stack/providers/utils/common/data_schema_validator.py
index eb9d9dd60..3f8c4b111 100644
--- a/llama_stack/providers/utils/common/data_schema_validator.py
+++ b/llama_stack/providers/utils/common/data_schema_validator.py
@@ -75,29 +75,31 @@ VALID_SCHEMAS_FOR_EVAL = [
 ]
 
 
-def get_valid_schemas(api_str: str):
-    if api_str == Api.scoring.value:
-        return VALID_SCHEMAS_FOR_SCORING
-    elif api_str == Api.eval.value:
-        return VALID_SCHEMAS_FOR_EVAL
-    else:
-        raise ValueError(f"Invalid API string: {api_str}")
+# TODO(xiyan): add this back
+
+# def get_valid_schemas(api_str: str):
+#     if api_str == Api.scoring.value:
+#         return VALID_SCHEMAS_FOR_SCORING
+#     elif api_str == Api.eval.value:
+#         return VALID_SCHEMAS_FOR_EVAL
+#     else:
+#         raise ValueError(f"Invalid API string: {api_str}")
 
 
-def validate_dataset_schema(
-    dataset_schema: Dict[str, Any],
-    expected_schemas: List[Dict[str, Any]],
-):
-    if dataset_schema not in expected_schemas:
-        raise ValueError(f"Dataset {dataset_schema} does not have a correct input schema in {expected_schemas}")
+# def validate_dataset_schema(
+#     dataset_schema: Dict[str, Any],
+#     expected_schemas: List[Dict[str, Any]],
+# ):
+#     if dataset_schema not in expected_schemas:
+#         raise ValueError(f"Dataset {dataset_schema} does not have a correct input schema in {expected_schemas}")
 
 
-def validate_row_schema(
-    input_row: Dict[str, Any],
-    expected_schemas: List[Dict[str, Any]],
-):
-    for schema in expected_schemas:
-        if all(key in input_row for key in schema):
-            return
+# def validate_row_schema(
+#     input_row: Dict[str, Any],
+#     expected_schemas: List[Dict[str, Any]],
+# ):
+#     for schema in expected_schemas:
+#         if all(key in input_row for key in schema):
+#             return
 
-    raise ValueError(f"Input row {input_row} does not match any of the expected schemas in {expected_schemas}")
+#     raise ValueError(f"Input row {input_row} does not match any of the expected schemas in {expected_schemas}")
diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py
index 61999c270..5a30e7189 100644
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@@ -11,8 +11,8 @@ from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOCon
 from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py
index 7d3fe7ca2..beacfc521 100644
--- a/llama_stack/templates/cerebras/cerebras.py
+++ b/llama_stack/templates/cerebras/cerebras.py
@@ -16,8 +16,8 @@ from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
 from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py
index 85523ef06..efb9647f7 100644
--- a/llama_stack/templates/ci-tests/ci_tests.py
+++ b/llama_stack/templates/ci-tests/ci_tests.py
@@ -22,8 +22,8 @@ from llama_stack.providers.remote.inference.fireworks.config import FireworksImp
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py
index dad8b6a8e..36ab22188 100644
--- a/llama_stack/templates/dev/dev.py
+++ b/llama_stack/templates/dev/dev.py
@@ -45,8 +45,8 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
 )
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
@@ -96,10 +96,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
 def get_distribution_template() -> DistributionTemplate:
     inference_providers, available_models = get_inference_providers()
     providers = {
-        "inference": (
-            [p.provider_type for p in inference_providers]
-            + ["inline::sentence-transformers"]
-        ),
+        "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
         "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
@@ -119,9 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
         Provider(
             provider_id="sqlite-vec",
             provider_type="inline::sqlite-vec",
-            config=SQLiteVectorIOConfig.sample_run_config(
-                f"~/.llama/distributions/{name}"
-            ),
+            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
             provider_id="${env.ENABLE_CHROMADB+chromadb}",
diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py
index 6ea73d3b8..9b33ebc7b 100644
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@@ -21,8 +21,8 @@ from llama_stack.providers.remote.inference.fireworks.config import FireworksImp
 from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py
index bde870c55..b51cceb0e 100644
--- a/llama_stack/templates/groq/groq.py
+++ b/llama_stack/templates/groq/groq.py
@@ -15,8 +15,8 @@ from llama_stack.providers.remote.inference.groq import GroqConfig
 from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
index f273c1a17..2cf8e98d4 100644
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@@ -17,8 +17,8 @@ from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
 from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
@@ -87,9 +87,7 @@ def get_distribution_template() -> DistributionTemplate:
                     ]
                 },
                 default_models=[inference_model, safety_model],
-                default_shields=[
-                    ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")
-                ],
+                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
                 default_tool_groups=default_tool_groups,
             ),
         },
diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py
index 185fd867f..1009efa43 100644
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@@ -9,7 +9,6 @@ from typing import Dict, List, Tuple
 from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
 from llama_stack.apis.models.models import ModelType
 from llama_stack.distribution.datatypes import (
-    BenchmarkInput,
     DatasetInput,
     ModelInput,
     Provider,
@@ -31,14 +30,12 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
 from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )
 
 
-def get_inference_providers() -> (
-    Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]
-):
+def get_inference_providers() -> Tuple[List[Provider], Dict[str, List[ProviderModelEntry]]]:
     # in this template, we allow each API key to be optional
     providers = [
         (
@@ -119,9 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
         Provider(
             provider_id="sqlite-vec",
             provider_type="inline::sqlite-vec",
-            config=SQLiteVectorIOConfig.sample_run_config(
-                f"~/.llama/distributions/{name}"
-            ),
+            config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
         ),
         Provider(
             provider_id="${env.ENABLE_CHROMADB+chromadb}",
diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py
index fbeeaad09..fbb9417b9 100644
--- a/llama_stack/templates/together/together.py
+++ b/llama_stack/templates/together/together.py
@@ -21,8 +21,8 @@ from llama_stack.providers.remote.inference.together import TogetherImplConfig
 from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
 from llama_stack.templates.template import (
     DistributionTemplate,
-    get_model_registry,
     RunConfigSettings,
+    get_model_registry,
 )