diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index bae5188fa..ec782c331 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -1,9 +1,18 @@
 name: Integration tests
 
 on:
-  pull_request:
   push:
-    branches: [main]
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'distributions/**'
+      - 'llama_stack/**'
+      - 'tests/integration/**'
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - '.github/workflows/integration-tests.yml' # This workflow
 
 jobs:
   ollama:
@@ -56,8 +65,7 @@ jobs:
           INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
         run: |
           source .venv/bin/activate
-          # TODO: use "llama stack run"
-          nohup uv run python -m llama_stack.distribution.server.server --yaml-config ./llama_stack/templates/ollama/run.yaml > server.log 2>&1 &
+          nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 &
 
       - name: Wait for Llama Stack server to be ready
         run: |
diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index be4298a98..e6871bf99 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -40,6 +40,7 @@ jobs:
       matrix:
         template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
         image-type: [venv, container]
+      fail-fast: false # We want to run all jobs even if some fail
 
     steps:
       - name: Checkout repository
@@ -67,7 +68,9 @@ jobs:
 
       - name: Run Llama Stack Build
         run: |
-          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
+          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
+          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
 
       - name: Print dependencies in the image
         if: matrix.image-type == 'venv'
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index c7a30e9b8..6d6e91f22 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -5,6 +5,14 @@ on:
     branches: [ main ]
   pull_request:
     branches: [ main ]
+    paths:
+      - 'distributions/**'
+      - 'llama_stack/**'
+      - 'tests/unit/**'
+      - 'uv.lock'
+      - 'pyproject.toml'
+      - 'requirements.txt'
+      - '.github/workflows/unit-tests.yml' # This workflow
   workflow_dispatch:
 
 jobs:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 072fa21e2..e83e64672 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -77,7 +77,7 @@ repos:
         name: Distribution Template Codegen
         additional_dependencies:
           - uv==0.6.0
-        entry: uv run --extra codegen python -m llama_stack.scripts.distro_codegen
+        entry: uv run --extra codegen ./scripts/distro_codegen.py
         language: python
         pass_filenames: false
         require_serial: true
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1f188f259..e458fec0a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -159,7 +159,7 @@ LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama
 
 ### Updating Provider Configurations
 
-If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `python llama_stack/scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
+If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
 
 ### Building the Documentation
 
diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index d2ed12d3a..33b497a33 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -401,16 +401,13 @@
   ],
   "nvidia": [
     "aiosqlite",
-    "autoevals",
     "blobfile",
     "chardet",
-    "datasets",
     "faiss-cpu",
     "fastapi",
     "fire",
     "httpx",
     "matplotlib",
-    "mcp",
     "nltk",
     "numpy",
     "openai",
diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 608237cfd..7df0c901e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -2233,6 +2233,67 @@
             }
         },
         "/v1/datasetio/iterrows/{dataset_id}": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/IterrowsResponse"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
+                    }
+                },
+                "tags": [
+                    "DatasetIO"
+                ],
+                "description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.",
+                "parameters": [
+                    {
+                        "name": "dataset_id",
+                        "in": "path",
+                        "description": "The ID of the dataset to get the rows from.",
+                        "required": true,
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    {
+                        "name": "start_index",
+                        "in": "query",
+                        "description": "Index into dataset for the first row to get. Get all rows if None.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    },
+                    {
+                        "name": "limit",
+                        "in": "query",
+                        "description": "The number of rows to get.",
+                        "required": false,
+                        "schema": {
+                            "type": "integer"
+                        }
+                    }
+                ]
+            }
+        },
+        "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
             "get": {
                 "responses": {
                     "200": {
@@ -6552,100 +6613,14 @@
                         "const": "factuality",
                         "default": "factuality"
                     },
-                    "factuality": {
-                        "type": "object",
-                        "properties": {
-                            "aggregation_functions": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string",
-                                    "enum": [
-                                        "average",
-                                        "median",
-                                        "categorical_count",
-                                        "accuracy"
-                                    ],
-                                    "title": "AggregationFunctionType",
-                                    "description": "A type of aggregation function."
-                                }
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "aggregation_functions"
-                        ],
-                        "title": "BasicGraderParams"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "factuality"
-                ],
-                "title": "FactualityGrader"
-            },
-            "FaithfulnessGrader": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "faithfulness",
-                        "default": "faithfulness"
-                    },
-                    "faithfulness": {
-                        "type": "object",
-                        "properties": {
-                            "aggregation_functions": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string",
-                                    "enum": [
-                                        "average",
-                                        "median",
-                                        "categorical_count",
-                                        "accuracy"
-                                    ],
-                                    "title": "AggregationFunctionType",
-                                    "description": "A type of aggregation function."
-                                }
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "aggregation_functions"
-                        ],
-                        "title": "BasicGraderParams"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "faithfulness"
-                ],
-                "title": "FaithfulnessGrader"
-            },
-            "Grader": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
+                    "dataset_id": {
                         "type": "string"
                     },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "grader",
-                        "default": "grader"
-                    },
-                    "grader": {
-                        "$ref": "#/components/schemas/GraderDefinition"
-                    },
-                    "description": {
-                        "type": "string"
+                    "scoring_functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
                     },
                     "metadata": {
                         "type": "object",
@@ -6679,98 +6654,163 @@
                     "provider_resource_id",
                     "provider_id",
                     "type",
-                    "grader",
+                    "dataset_id",
+                    "scoring_functions",
                     "metadata"
                 ],
-                "title": "Grader"
+                "title": "Benchmark"
             },
-            "GraderDefinition": {
+            "DataSource": {
                 "oneOf": [
                     {
-                        "$ref": "#/components/schemas/LlmGrader"
+                        "$ref": "#/components/schemas/URIDataSource"
                     },
                     {
-                        "$ref": "#/components/schemas/RegexParserGrader"
-                    },
-                    {
-                        "$ref": "#/components/schemas/EqualityGrader"
-                    },
-                    {
-                        "$ref": "#/components/schemas/SubsetOfGrader"
-                    },
-                    {
-                        "$ref": "#/components/schemas/FactualityGrader"
-                    },
-                    {
-                        "$ref": "#/components/schemas/FaithfulnessGrader"
+                        "$ref": "#/components/schemas/RowsDataSource"
                     }
                 ],
                 "discriminator": {
                     "propertyName": "type",
                     "mapping": {
-                        "llm": "#/components/schemas/LlmGrader",
-                        "regex_parser": "#/components/schemas/RegexParserGrader",
-                        "equality": "#/components/schemas/EqualityGrader",
-                        "subset_of": "#/components/schemas/SubsetOfGrader",
-                        "factuality": "#/components/schemas/FactualityGrader",
-                        "faithfulness": "#/components/schemas/FaithfulnessGrader"
+                        "uri": "#/components/schemas/URIDataSource",
+                        "rows": "#/components/schemas/RowsDataSource"
                     }
                 }
             },
-            "LlmGrader": {
+            "Grader": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "grader",
+                        "default": "grader"
+                    },
+                    "purpose": {
+                        "type": "string",
+                        "enum": [
+                            "post-training/messages",
+                            "eval/question-answer",
+                            "eval/messages-answer"
+                        ],
+                        "title": "DatasetPurpose",
+                        "description": "Purpose of the dataset. Each purpose has a required input data schema."
+                    },
+                    "source": {
+                        "$ref": "#/components/schemas/DataSource"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_resource_id",
+                    "provider_id",
+                    "type",
+                    "purpose",
+                    "source",
+                    "metadata"
+                ],
+                "title": "Dataset"
+            },
+            "RowsDataSource": {
                 "type": "object",
                 "properties": {
                     "type": {
                         "type": "string",
-                        "const": "llm",
-                        "default": "llm"
+                        "const": "rows",
+                        "default": "rows"
                     },
-                    "llm": {
-                        "type": "object",
-                        "properties": {
-                            "model": {
-                                "type": "string"
-                            },
-                            "prompt": {
-                                "type": "string"
-                            },
-                            "score_regexes": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            },
-                            "aggregation_functions": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string",
-                                    "enum": [
-                                        "average",
-                                        "median",
-                                        "categorical_count",
-                                        "accuracy"
-                                    ],
-                                    "title": "AggregationFunctionType",
-                                    "description": "A type of aggregation function."
-                                }
+                    "rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
                             }
                         },
-                        "additionalProperties": false,
-                        "required": [
-                            "model",
-                            "prompt",
-                            "score_regexes",
-                            "aggregation_functions"
-                        ],
-                        "title": "LlmGraderParams"
+                        "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
                     "type",
-                    "llm"
+                    "rows"
                 ],
-                "title": "LlmGrader"
+                "title": "RowsDataSource",
+                "description": "A dataset stored in rows."
+            },
+            "URIDataSource": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "uri",
+                        "default": "uri"
+                    },
+                    "uri": {
+                        "type": "string",
+                        "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "uri"
+                ],
+                "title": "URIDataSource",
+                "description": "A dataset that can be obtained from a URI."
             },
             "RegexParserGrader": {
                 "type": "object",
@@ -6819,45 +6859,182 @@
                 ],
                 "title": "RegexParserGrader"
             },
-            "SubsetOfGrader": {
+            "ModelType": {
+                "type": "string",
+                "enum": [
+                    "llm",
+                    "embedding"
+                ],
+                "title": "ModelType"
+            },
+            "AgentTurnInputType": {
                 "type": "object",
                 "properties": {
                     "type": {
                         "type": "string",
-                        "const": "subset_of",
-                        "default": "subset_of"
-                    },
-                    "subset_of": {
-                        "type": "object",
-                        "properties": {
-                            "aggregation_functions": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string",
-                                    "enum": [
-                                        "average",
-                                        "median",
-                                        "categorical_count",
-                                        "accuracy"
-                                    ],
-                                    "title": "AggregationFunctionType",
-                                    "description": "A type of aggregation function."
-                                }
-                            }
-                        },
-                        "additionalProperties": false,
-                        "required": [
-                            "aggregation_functions"
-                        ],
-                        "title": "BasicGraderParams"
+                        "const": "agent_turn_input",
+                        "default": "agent_turn_input"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "type",
-                    "subset_of"
+                    "type"
                 ],
-                "title": "SubsetOfGrader"
+                "title": "AgentTurnInputType"
+            },
+            "ArrayType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "array",
+                        "default": "array"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ArrayType"
+            },
+            "BooleanType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "boolean",
+                        "default": "boolean"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "BooleanType"
+            },
+            "ChatCompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "chat_completion_input",
+                        "default": "chat_completion_input"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ChatCompletionInputType"
+            },
+            "CompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "completion_input",
+                        "default": "completion_input"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "CompletionInputType"
+            },
+            "JsonType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "json",
+                        "default": "json"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "JsonType"
+            },
+            "NumberType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "number",
+                        "default": "number"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "NumberType"
+            },
+            "ObjectType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "object",
+                        "default": "object"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ObjectType"
+            },
+            "ParamType": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/StringType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/NumberType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/BooleanType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ArrayType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ObjectType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/JsonType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/UnionType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ChatCompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/CompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnInputType"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "string": "#/components/schemas/StringType",
+                        "number": "#/components/schemas/NumberType",
+                        "boolean": "#/components/schemas/BooleanType",
+                        "array": "#/components/schemas/ArrayType",
+                        "object": "#/components/schemas/ObjectType",
+                        "json": "#/components/schemas/JsonType",
+                        "union": "#/components/schemas/UnionType",
+                        "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
+                        "completion_input": "#/components/schemas/CompletionInputType",
+                        "agent_turn_input": "#/components/schemas/AgentTurnInputType"
+                    }
+                }
             },
             "Model": {
                 "type": "object",
@@ -6913,17 +7090,39 @@
                     "provider_id",
                     "type",
                     "metadata",
-                    "model_type"
+                    "return_type"
                 ],
-                "title": "Model"
+                "title": "ScoringFn"
             },
-            "ModelType": {
-                "type": "string",
-                "enum": [
-                    "llm",
-                    "embedding"
+            "StringType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "string",
+                        "default": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
                 ],
-                "title": "ModelType"
+                "title": "StringType"
+            },
+            "UnionType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "union",
+                        "default": "union"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "UnionType"
             },
             "Shield": {
                 "type": "object",
@@ -8131,7 +8330,7 @@
                         },
                         "description": "The rows in the current page."
                     },
-                    "next_index": {
+                    "next_start_index": {
                         "type": "integer",
                         "description": "Index into dataset for the first row in the next page. None if there are no more rows."
                     }
@@ -9440,7 +9639,7 @@
                     },
                     "source": {
                         "$ref": "#/components/schemas/DataSource",
-                        "description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
+                        "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
                     },
                     "metadata": {
                         "type": "object",
@@ -9478,50 +9677,6 @@
                     "purpose",
                     "source"
                 ],
-                "title": "RegisterDatasetRequest"
-            },
-            "RegisterGraderRequest": {
-                "type": "object",
-                "properties": {
-                    "grader": {
-                        "$ref": "#/components/schemas/GraderDefinition",
-                        "description": "The grader definition, E.g. - { \"type\": \"llm\", \"llm\": { \"model\": \"llama-405b\", \"prompt\": \"You are a judge. Score the answer based on the question. {question} {answer}\", } }"
-                    },
-                    "grader_id": {
-                        "type": "string",
-                        "description": "(Optional) The ID of the grader. If not provided, a random ID will be generated."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Any additional metadata for this grader. - E.g. { \"description\": \"A grader that scores the answer based on the question.\", }"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "grader"
-                ],
                 "title": "RegisterGraderRequest"
             },
             "RegisterModelRequest": {
@@ -10199,9 +10354,6 @@
         {
             "name": "Files"
         },
-        {
-            "name": "Graders"
-        },
         {
             "name": "Inference",
             "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@@ -10254,9 +10406,8 @@
                 "Benchmarks",
                 "DatasetIO",
                 "Datasets",
-                "Evaluation",
+                "Eval",
                 "Files",
-                "Graders",
                 "Inference",
                 "Inspect",
                 "Models",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 57bf76478..90b04b50a 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -1507,6 +1507,50 @@ paths:
               $ref: '#/components/schemas/InvokeToolRequest'
         required: true
   /v1/datasetio/iterrows/{dataset_id}:
+    get:
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/IterrowsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
+      tags:
+        - DatasetIO
+      description: >-
+        Get a paginated list of rows from a dataset. Uses cursor-based pagination.
+      parameters:
+        - name: dataset_id
+          in: path
+          description: >-
+            The ID of the dataset to get the rows from.
+          required: true
+          schema:
+            type: string
+        - name: start_index
+          in: query
+          description: >-
+            Index into dataset for the first row to get. Get all rows if None.
+          required: false
+          schema:
+            type: integer
+        - name: limit
+          in: query
+          description: The number of rows to get.
+          required: false
+          schema:
+            type: integer
+  /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
       responses:
         '200':
@@ -4527,255 +4571,6 @@ components:
       title: URIDataSource
       description: >-
         A dataset that can be obtained from a URI.
-    EqualityGrader:
-      type: object
-      properties:
-        type:
-          type: string
-          const: equality
-          default: equality
-        equality:
-          type: object
-          properties:
-            aggregation_functions:
-              type: array
-              items:
-                type: string
-                enum:
-                  - average
-                  - median
-                  - categorical_count
-                  - accuracy
-                title: AggregationFunctionType
-                description: A type of aggregation function.
-          additionalProperties: false
-          required:
-            - aggregation_functions
-          title: BasicGraderParams
-      additionalProperties: false
-      required:
-        - type
-        - equality
-      title: EqualityGrader
-    FactualityGrader:
-      type: object
-      properties:
-        type:
-          type: string
-          const: factuality
-          default: factuality
-        factuality:
-          type: object
-          properties:
-            aggregation_functions:
-              type: array
-              items:
-                type: string
-                enum:
-                  - average
-                  - median
-                  - categorical_count
-                  - accuracy
-                title: AggregationFunctionType
-                description: A type of aggregation function.
-          additionalProperties: false
-          required:
-            - aggregation_functions
-          title: BasicGraderParams
-      additionalProperties: false
-      required:
-        - type
-        - factuality
-      title: FactualityGrader
-    FaithfulnessGrader:
-      type: object
-      properties:
-        type:
-          type: string
-          const: faithfulness
-          default: faithfulness
-        faithfulness:
-          type: object
-          properties:
-            aggregation_functions:
-              type: array
-              items:
-                type: string
-                enum:
-                  - average
-                  - median
-                  - categorical_count
-                  - accuracy
-                title: AggregationFunctionType
-                description: A type of aggregation function.
-          additionalProperties: false
-          required:
-            - aggregation_functions
-          title: BasicGraderParams
-      additionalProperties: false
-      required:
-        - type
-        - faithfulness
-      title: FaithfulnessGrader
-    Grader:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          const: grader
-          default: grader
-        grader:
-          $ref: '#/components/schemas/GraderDefinition'
-        description:
-          type: string
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_resource_id
-        - provider_id
-        - type
-        - grader
-        - metadata
-      title: Grader
-    GraderDefinition:
-      oneOf:
-        - $ref: '#/components/schemas/LlmGrader'
-        - $ref: '#/components/schemas/RegexParserGrader'
-        - $ref: '#/components/schemas/EqualityGrader'
-        - $ref: '#/components/schemas/SubsetOfGrader'
-        - $ref: '#/components/schemas/FactualityGrader'
-        - $ref: '#/components/schemas/FaithfulnessGrader'
-      discriminator:
-        propertyName: type
-        mapping:
-          llm: '#/components/schemas/LlmGrader'
-          regex_parser: '#/components/schemas/RegexParserGrader'
-          equality: '#/components/schemas/EqualityGrader'
-          subset_of: '#/components/schemas/SubsetOfGrader'
-          factuality: '#/components/schemas/FactualityGrader'
-          faithfulness: '#/components/schemas/FaithfulnessGrader'
-    LlmGrader:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-        llm:
-          type: object
-          properties:
-            model:
-              type: string
-            prompt:
-              type: string
-            score_regexes:
-              type: array
-              items:
-                type: string
-            aggregation_functions:
-              type: array
-              items:
-                type: string
-                enum:
-                  - average
-                  - median
-                  - categorical_count
-                  - accuracy
-                title: AggregationFunctionType
-                description: A type of aggregation function.
-          additionalProperties: false
-          required:
-            - model
-            - prompt
-            - score_regexes
-            - aggregation_functions
-          title: LlmGraderParams
-      additionalProperties: false
-      required:
-        - type
-        - llm
-      title: LlmGrader
-    RegexParserGrader:
-      type: object
-      properties:
-        type:
-          type: string
-          const: regex_parser
-          default: regex_parser
-        regex_parser:
-          type: object
-          properties:
-            parsing_regexes:
-              type: array
-              items:
-                type: string
-            aggregation_functions:
-              type: array
-              items:
-                type: string
-                enum:
-                  - average
-                  - median
-                  - categorical_count
-                  - accuracy
-                title: AggregationFunctionType
-                description: A type of aggregation function.
-          additionalProperties: false
-          required:
-            - parsing_regexes
-            - aggregation_functions
-          title: RegexParserGraderParams
-      additionalProperties: false
-      required:
-        - type
-        - regex_parser
-      title: RegexParserGrader
-    SubsetOfGrader:
-      type: object
-      properties:
-        type:
-          type: string
-          const: subset_of
-          default: subset_of
-        subset_of:
-          type: object
-          properties:
-            aggregation_functions:
-              type: array
-              items:
-                type: string
-                enum:
-                  - average
-                  - median
-                  - categorical_count
-                  - accuracy
-                title: AggregationFunctionType
-                description: A type of aggregation function.
-          additionalProperties: false
-          required:
-            - aggregation_functions
-          title: BasicGraderParams
-      additionalProperties: false
-      required:
-        - type
-        - subset_of
-      title: SubsetOfGrader
     Model:
       type: object
       properties:
@@ -4817,6 +4612,224 @@ components:
         - llm
         - embedding
       title: ModelType
+    AgentTurnInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: agent_turn_input
+          default: agent_turn_input
+      additionalProperties: false
+      required:
+        - type
+      title: AgentTurnInputType
+    ArrayType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: array
+          default: array
+      additionalProperties: false
+      required:
+        - type
+      title: ArrayType
+    BooleanType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: boolean
+          default: boolean
+      additionalProperties: false
+      required:
+        - type
+      title: BooleanType
+    ChatCompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: chat_completion_input
+          default: chat_completion_input
+      additionalProperties: false
+      required:
+        - type
+      title: ChatCompletionInputType
+    CompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: completion_input
+          default: completion_input
+      additionalProperties: false
+      required:
+        - type
+      title: CompletionInputType
+    JsonType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rows
+          default: rows
+        rows:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
+            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
+            world!"}]} ]
+      additionalProperties: false
+      required:
+        - type
+        - rows
+      title: RowsDataSource
+      description: A dataset stored in rows.
+    URIDataSource:
+      type: object
+      properties:
+        type:
+          type: string
+          const: uri
+          default: uri
+        uri:
+          type: string
+          description: >-
+            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
+            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
+      additionalProperties: false
+      required:
+        - type
+        - uri
+      title: URIDataSource
+      description: >-
+        A dataset that can be obtained from a URI.
+    EqualityGrader:
+      type: object
+      properties:
+        type:
+          type: string
+          const: equality
+          default: equality
+        equality:
+          type: object
+          properties:
+            aggregation_functions:
+              type: array
+              items:
+                type: string
+                enum:
+                  - average
+                  - median
+                  - categorical_count
+                  - accuracy
+                title: AggregationFunctionType
+                description: A type of aggregation function.
+          additionalProperties: false
+          required:
+            - aggregation_functions
+          title: BasicGraderParams
+      additionalProperties: false
+      required:
+        - type
+      title: ObjectType
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+        - $ref: '#/components/schemas/AgentTurnInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          agent_turn_input: '#/components/schemas/AgentTurnInputType'
+    ScoringFn:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          const: scoring_function
+          default: scoring_function
+        description:
+          type: string
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_resource_id
+        - provider_id
+        - type
+        - grader
+        - metadata
+        - return_type
+      title: ScoringFn
+    StringType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: string
+          default: string
+      additionalProperties: false
+      required:
+        - type
+      title: StringType
+    UnionType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: union
+          default: union
+      additionalProperties: false
+      required:
+        - type
+      title: UnionType
     Shield:
       type: object
       properties:
@@ -5580,7 +5593,7 @@ components:
                 - type: array
                 - type: object
           description: The rows in the current page.
-        next_index:
+        next_start_index:
           type: integer
           description: >-
             Index into dataset for the first row in the next page. None if there are
@@ -6461,12 +6474,14 @@ components:
         source:
           $ref: '#/components/schemas/DataSource'
           description: >-
-            The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
-            } - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri",
-            "uri": "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-            "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows",
-            "rows": [ { "messages": [ {"role": "user", "content": "Hello, world!"},
-            {"role": "assistant", "content": "Hello, world!"}, ] } ] }
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
         metadata:
           type: object
           additionalProperties:
@@ -6488,37 +6503,6 @@ components:
         - purpose
         - source
       title: RegisterDatasetRequest
-    RegisterGraderRequest:
-      type: object
-      properties:
-        grader:
-          $ref: '#/components/schemas/GraderDefinition'
-          description: >-
-            The grader definition, E.g. - { "type": "llm", "llm": { "model": "llama-405b",
-            "prompt": "You are a judge. Score the answer based on the question. {question}
-            {answer}", } }
-        grader_id:
-          type: string
-          description: >-
-            (Optional) The ID of the grader. If not provided, a random ID will be
-            generated.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Any additional metadata for this grader. - E.g. { "description":
-            "A grader that scores the answer based on the question.", }
-      additionalProperties: false
-      required:
-        - grader
-      title: RegisterGraderRequest
     RegisterModelRequest:
       type: object
       properties:
@@ -6951,9 +6935,10 @@ tags:
   - name: Benchmarks
   - name: DatasetIO
   - name: Datasets
-  - name: Evaluation
+  - name: Eval
+    x-displayName: >-
+      Llama Stack Evaluation API for running evaluations on model and agent candidates.
   - name: Files
-  - name: Graders
   - name: Inference
     description: >-
       This API provides the raw interface to the underlying models. Two kinds of models
@@ -6988,9 +6973,8 @@ x-tagGroups:
       - Benchmarks
       - DatasetIO
       - Datasets
-      - Evaluation
+      - Eval
       - Files
-      - Graders
       - Inference
       - Inspect
       - Models
diff --git a/docs/source/contributing/new_api_provider.md b/docs/source/contributing/new_api_provider.md
index a72f71319..c412a350b 100644
--- a/docs/source/contributing/new_api_provider.md
+++ b/docs/source/contributing/new_api_provider.md
@@ -6,7 +6,7 @@ This guide will walk you through the process of adding a new API provider to Lla
 - Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
 - Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
 - Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
-- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`llama_stack/scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
+- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
 
 
 Here are some example PRs to help you get started:
diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md
index efa0a2d74..774d5ec1b 100644
--- a/docs/source/distributions/remote_hosted_distro/nvidia.md
+++ b/docs/source/distributions/remote_hosted_distro/nvidia.md
@@ -6,13 +6,13 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
-| datasetio | `remote::huggingface`, `inline::localfs` |
+| datasetio | `inline::localfs` |
 | eval | `inline::meta-reference` |
 | inference | `remote::nvidia` |
-| safety | `inline::llama-guard` |
-| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
+| safety | `remote::nvidia` |
+| scoring | `inline::basic` |
 | telemetry | `inline::meta-reference` |
-| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
+| tool_runtime | `inline::rag-runtime` |
 | vector_io | `inline::faiss` |
 
 
@@ -20,8 +20,10 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 
 The following environment variables can be configured:
 
-- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
 - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
+- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
+- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
+- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
 
 ### Models
 
diff --git a/docs/source/references/llama_stack_client_cli_reference.md b/docs/source/references/llama_stack_client_cli_reference.md
index 26b81cf92..0b84027f0 100644
--- a/docs/source/references/llama_stack_client_cli_reference.md
+++ b/docs/source/references/llama_stack_client_cli_reference.md
@@ -6,17 +6,32 @@ The `llama-stack-client` CLI allows you to query information about the distribut
 
 ### `llama-stack-client`
 ```bash
-llama-stack-client -h
+llama-stack-client
+Usage: llama-stack-client [OPTIONS] COMMAND [ARGS]...
 
-usage: llama-stack-client [-h] {models,memory_banks,shields} ...
+  Welcome to the LlamaStackClient CLI
 
-Welcome to the LlamaStackClient CLI
+Options:
+  --version        Show the version and exit.
+  --endpoint TEXT  Llama Stack distribution endpoint
+  --api-key TEXT   Llama Stack distribution API key
+  --config TEXT    Path to config file
+  --help           Show this message and exit.
 
-options:
-  -h, --help            show this help message and exit
-
-subcommands:
-  {models,memory_banks,shields}
+Commands:
+  configure          Configure Llama Stack Client CLI.
+  datasets           Manage datasets.
+  eval               Run evaluation tasks.
+  eval_tasks         Manage evaluation tasks.
+  inference          Inference (chat).
+  inspect            Inspect server configuration.
+  models             Manage GenAI models.
+  post_training      Post-training.
+  providers          Manage API providers.
+  scoring_functions  Manage scoring functions.
+  shields            Manage safety shield services.
+  toolgroups         Manage available tool groups.
+  vector_dbs         Manage vector databases.
 ```
 
 ### `llama-stack-client configure`
@@ -127,11 +142,11 @@ llama-stack-client vector_dbs list
 llama-stack-client vector_dbs register <vector-db-id> [--provider-id <provider-id>] [--provider-vector-db-id <provider-vector-db-id>] [--embedding-model <embedding-model>] [--embedding-dimension <embedding-dimension>]
 ```
 
-Options:
-- `--provider-id`: Optional. Provider ID for the vector db
-- `--provider-vector-db-id`: Optional. Provider's vector db ID
-- `--embedding-model`: Optional. Embedding model to use. Default: "all-MiniLM-L6-v2"
-- `--embedding-dimension`: Optional. Dimension of embeddings. Default: 384
+Optional arguments:
+- `--provider-id`: Provider ID for the vector db
+- `--provider-vector-db-id`: Provider's vector db ID
+- `--embedding-model`: Embedding model to use. Default: "all-MiniLM-L6-v2"
+- `--embedding-dimension`: Dimension of embeddings. Default: 384
 
 ### `llama-stack-client vector_dbs unregister`
 ```bash
@@ -157,11 +172,13 @@ llama-stack-client shields list
 llama-stack-client shields register --shield-id <shield-id> [--provider-id <provider-id>] [--provider-shield-id <provider-shield-id>] [--params <params>]
 ```
 
-Options:
-- `--shield-id`: Required. ID of the shield
-- `--provider-id`: Optional. Provider ID for the shield
-- `--provider-shield-id`: Optional. Provider's shield ID
-- `--params`: Optional. JSON configuration parameters for the shield
+Required arguments:
+- `--shield-id`: ID of the shield
+
+Optional arguments:
+- `--provider-id`: Provider ID for the shield
+- `--provider-shield-id`: Provider's shield ID
+- `--params`: JSON configuration parameters for the shield
 
 ## Eval Task Management
 
@@ -175,13 +192,15 @@ llama-stack-client benchmarks list
 llama-stack-client benchmarks register --eval-task-id <eval-task-id> --dataset-id <dataset-id> --scoring-functions <function1> [<function2> ...] [--provider-id <provider-id>] [--provider-eval-task-id <provider-eval-task-id>] [--metadata <metadata>]
 ```
 
-Options:
-- `--eval-task-id`: Required. ID of the eval task
-- `--dataset-id`: Required. ID of the dataset to evaluate
-- `--scoring-functions`: Required. One or more scoring functions to use for evaluation
-- `--provider-id`: Optional. Provider ID for the eval task
-- `--provider-eval-task-id`: Optional. Provider's eval task ID
-- `--metadata`: Optional. Metadata for the eval task in JSON format
+Required arguments:
+- `--eval-task-id`: ID of the eval task
+- `--dataset-id`: ID of the dataset to evaluate
+- `--scoring-functions`: One or more scoring functions to use for evaluation
+
+Optional arguments:
+- `--provider-id`: Provider ID for the eval task
+- `--provider-eval-task-id`: Provider's eval task ID
+- `--metadata`: Metadata for the eval task in JSON format
 
 ## Eval execution
 ### `llama-stack-client eval run-benchmark`
@@ -189,11 +208,13 @@ Options:
 llama-stack-client eval run-benchmark <eval-task-id1> [<eval-task-id2> ...] --eval-task-config <config-file> --output-dir <output-dir> [--num-examples <num>] [--visualize]
 ```
 
-Options:
-- `--eval-task-config`: Required. Path to the eval task config file in JSON format
-- `--output-dir`: Required. Path to the directory where evaluation results will be saved
-- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging)
-- `--visualize`: Optional flag. If set, visualizes evaluation results after completion
+Required arguments:
+- `--eval-task-config`: Path to the eval task config file in JSON format
+- `--output-dir`: Path to the directory where evaluation results will be saved
+
+Optional arguments:
+- `--num-examples`: Number of examples to evaluate (useful for debugging)
+- `--visualize`: If set, visualizes evaluation results after completion
 
 Example benchmark_config.json:
 ```json
@@ -214,11 +235,13 @@ Example benchmark_config.json:
 llama-stack-client eval run-scoring <eval-task-id> --eval-task-config <config-file> --output-dir <output-dir> [--num-examples <num>] [--visualize]
 ```
 
-Options:
-- `--eval-task-config`: Required. Path to the eval task config file in JSON format
-- `--output-dir`: Required. Path to the directory where scoring results will be saved
-- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging)
-- `--visualize`: Optional flag. If set, visualizes scoring results after completion
+Required arguments:
+- `--eval-task-config`: Path to the eval task config file in JSON format
+- `--output-dir`: Path to the directory where scoring results will be saved
+
+Optional arguments:
+- `--num-examples`: Number of examples to evaluate (useful for debugging)
+- `--visualize`: If set, visualizes scoring results after completion
 
 ## Tool Group Management
 
@@ -230,11 +253,11 @@ llama-stack-client toolgroups list
 +---------------------------+------------------+------+---------------+
 | identifier                | provider_id      | args | mcp_endpoint  |
 +===========================+==================+======+===============+
-| builtin::code_interpreter | code-interpreter | None | None         |
+| builtin::code_interpreter | code-interpreter | None | None          |
 +---------------------------+------------------+------+---------------+
-| builtin::rag             | rag-runtime      | None | None         |
+| builtin::rag              | rag-runtime      | None | None          |
 +---------------------------+------------------+------+---------------+
-| builtin::websearch       | tavily-search    | None | None         |
+| builtin::websearch        | tavily-search    | None | None          |
 +---------------------------+------------------+------+---------------+
 ```
 
@@ -250,11 +273,11 @@ Shows detailed information about a specific toolgroup. If the toolgroup is not f
 llama-stack-client toolgroups register <toolgroup_id> [--provider-id <provider-id>] [--provider-toolgroup-id <provider-toolgroup-id>] [--mcp-config <mcp-config>] [--args <args>]
 ```
 
-Options:
-- `--provider-id`: Optional. Provider ID for the toolgroup
-- `--provider-toolgroup-id`: Optional. Provider's toolgroup ID
-- `--mcp-config`: Optional. JSON configuration for the MCP endpoint
-- `--args`: Optional. JSON arguments for the toolgroup
+Optional arguments:
+- `--provider-id`: Provider ID for the toolgroup
+- `--provider-toolgroup-id`: Provider's toolgroup ID
+- `--mcp-config`: JSON configuration for the MCP endpoint
+- `--args`: JSON arguments for the toolgroup
 
 ### `llama-stack-client toolgroups unregister`
 ```bash
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index 6079e5b99..b1eaffa17 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -18,11 +18,11 @@ class IterrowsResponse(BaseModel):
     A paginated list of rows from a dataset.
 
     :param data: The rows in the current page.
-    :param next_index: Index into dataset for the first row in the next page. None if there are no more rows.
+    :param next_start_index: Index into dataset for the first row in the next page. None if there are no more rows.
     """
 
     data: List[Dict[str, Any]]
-    next_index: Optional[int] = None
+    next_start_index: Optional[int] = None
 
 
 class DatasetStore(Protocol):
@@ -46,9 +46,11 @@ class DatasetIO(Protocol):
 
         :param dataset_id: The ID of the dataset to get the rows from.
         :param start_index: Index into dataset for the first row to get. Get all rows if None.
-        :param limit: The number of rows to get per page.
+        :param limit: The number of rows to get.
         """
         ...
 
     @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
-    async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
+    async def append_rows(
+        self, dataset_id: str, rows: List[Dict[str, Any]]
+    ) -> None: ...
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index 1dd29ad8e..d033d0b70 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -163,7 +163,7 @@ class Datasets(Protocol):
                     ],
                     "answer": "John Doe"
                 }
-        :param source: The data source of the dataset. Examples:
+        :param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples:
            - {
                "type": "uri",
                "uri": "https://mywebsite.com/mydata.jsonl"
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index 3887bf4f9..d87e3bd0b 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -38,7 +38,7 @@ from llama_stack.distribution.distribution import get_provider_registry
 from llama_stack.distribution.resolver import InvalidProviderError
 from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
-from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty
+from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 
@@ -213,7 +213,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
         config = parse_and_maybe_upgrade_config(config_dict)
         run_args = formulate_run_args(args.image_type, args.image_name, config, args.template)
         run_args.extend([run_config, str(os.getenv("LLAMA_STACK_PORT", 8321))])
-        run_with_pty(run_args)
+        run_command(run_args)
 
 
 def _generate_run_config(
diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index e5686fb10..57a0b28cc 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -82,7 +82,7 @@ class StackRun(Subcommand):
 
         from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
         from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
-        from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty
+        from llama_stack.distribution.utils.exec import formulate_run_args, run_command
 
         config_file = Path(args.config)
         has_yaml_suffix = args.config.endswith(".yaml")
@@ -136,4 +136,4 @@ class StackRun(Subcommand):
 
         if args.tls_keyfile and args.tls_certfile:
             run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
-        run_with_pty(run_args)
+        run_command(run_args)
diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index 0e990d129..a8ee372da 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -6,7 +6,6 @@
 
 import importlib.resources
 import logging
-import sys
 from pathlib import Path
 from typing import Dict, List
 
@@ -15,7 +14,7 @@ from termcolor import cprint
 
 from llama_stack.distribution.datatypes import BuildConfig, Provider
 from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.utils.exec import run_command, run_with_pty
+from llama_stack.distribution.utils.exec import run_command
 from llama_stack.distribution.utils.image_types import LlamaStackImageType
 from llama_stack.providers.datatypes import Api
 
@@ -123,11 +122,7 @@ def build_image(
     if special_deps:
         args.append("#".join(special_deps))
 
-    is_terminal = sys.stdin.isatty()
-    if is_terminal:
-        return_code = run_with_pty(args)
-    else:
-        return_code = run_command(args)
+    return_code = run_command(args)
 
     if return_code != 0:
         log.error(
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 077b396cb..a8346c3b6 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -43,7 +43,7 @@ RED='\033[0;31m'
 NC='\033[0m' # No Color
 
 CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
-CONTAINER_OPTS=${CONTAINER_OPTS:-}
+CONTAINER_OPTS=${CONTAINER_OPTS:---progress=plain}
 
 TEMP_DIR=$(mktemp -d)
 
@@ -253,8 +253,7 @@ $CONTAINER_BINARY build \
   "${CLI_ARGS[@]}" \
   -t "$image_tag" \
   -f "$TEMP_DIR/Containerfile" \
-  "." \
-  --progress=plain
+  "."
 
 # clean up tmp/configs
 set +x
diff --git a/llama_stack/distribution/providers.py b/llama_stack/distribution/providers.py
index fb2476767..cf9b0b975 100644
--- a/llama_stack/distribution/providers.py
+++ b/llama_stack/distribution/providers.py
@@ -8,10 +8,13 @@
 from pydantic import BaseModel
 
 from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
+from llama_stack.log import get_logger
 
 from .datatypes import StackRunConfig
 from .stack import redact_sensitive_fields
 
+logger = get_logger(name=__name__, category="core")
+
 
 class ProviderImplConfig(BaseModel):
     run_config: StackRunConfig
@@ -31,6 +34,10 @@ class ProviderImpl(Providers):
     async def initialize(self) -> None:
         pass
 
+    async def shutdown(self) -> None:
+        logger.debug("ProviderImpl.shutdown")
+        pass
+
     async def list_providers(self) -> ListProvidersResponse:
         run_config = self.config.run_config
         safe_config = StackRunConfig(**redact_sensitive_fields(run_config.model_dump()))
diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py
index 86613dc9c..3bf3c81ce 100644
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@@ -4,13 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import errno
 import logging
 import os
-import select
 import signal
 import subprocess
-import sys
 
 from termcolor import cprint
 
@@ -88,13 +85,6 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
     return run_args
 
 
-def run_with_pty(command):
-    if sys.platform.startswith("win"):
-        return _run_with_pty_win(command)
-    else:
-        return _run_with_pty_unix(command)
-
-
 def in_notebook():
     try:
         from IPython import get_ipython
@@ -108,19 +98,19 @@ def in_notebook():
     return True
 
 
-# run a command in a pseudo-terminal, with interrupt handling,
-# useful when you want to run interactive things
-def _run_with_pty_unix(command):
-    import pty
-    import termios
+def run_command(command: list[str]) -> int:
+    """
+    Run a command with interrupt handling and output capture.
+    Uses subprocess.run with direct stream piping for better performance.
 
-    master, slave = pty.openpty()
+    Args:
+        command (list): The command to run.
 
-    old_settings = termios.tcgetattr(sys.stdin)
+    Returns:
+        int: The return code of the command.
+    """
     original_sigint = signal.getsignal(signal.SIGINT)
-
     ctrl_c_pressed = False
-    process = None
 
     def sigint_handler(signum, frame):
         nonlocal ctrl_c_pressed
@@ -131,106 +121,19 @@ def _run_with_pty_unix(command):
         # Set up the signal handler
         signal.signal(signal.SIGINT, sigint_handler)
 
-        new_settings = termios.tcgetattr(sys.stdin)
-        new_settings[3] = new_settings[3] & ~termios.ECHO  # Disable echo
-        new_settings[3] = new_settings[3] & ~termios.ICANON  # Disable canonical mode
-        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
-
-        process = subprocess.Popen(
+        # Run the command with stdout/stderr piped directly to system streams
+        result = subprocess.run(
             command,
-            stdin=slave,
-            stdout=slave,
-            stderr=slave,
-            universal_newlines=True,
-            preexec_fn=os.setsid,
+            text=True,
+            check=False,
         )
-
-        # Close the slave file descriptor as it's now owned by the subprocess
-        os.close(slave)
-
-        def handle_io():
-            while not ctrl_c_pressed:
-                try:
-                    rlist, _, _ = select.select([sys.stdin, master], [], [], 0.1)
-
-                    if sys.stdin in rlist:
-                        data = os.read(sys.stdin.fileno(), 1024)
-                        if not data:
-                            break
-                        os.write(master, data)
-
-                    if master in rlist:
-                        data = os.read(master, 1024)
-                        if not data:
-                            break
-                        sys.stdout.buffer.write(data)
-                        sys.stdout.flush()
-
-                except KeyboardInterrupt:
-                    # This will be raised when Ctrl+C is pressed
-                    break
-
-                if process.poll() is not None:
-                    break
-
-        handle_io()
-    except (EOFError, KeyboardInterrupt):
-        pass
-    except OSError as e:
-        if e.errno != errno.EIO:
-            raise
-    finally:
-        # Clean up
-        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
-        signal.signal(signal.SIGINT, original_sigint)
-
-        os.close(master)
-        if process and process.poll() is None:
-            process.terminate()
-            process.wait()
-
-    return process.returncode
-
-
-# run a command in a pseudo-terminal in windows, with interrupt handling,
-def _run_with_pty_win(command):
-    """
-    Runs a command with interactive support using subprocess directly.
-    """
-    try:
-        # For shell scripts on Windows, use appropriate shell
-        if isinstance(command, (list, tuple)):
-            if command[0].endswith(".sh"):
-                if os.path.exists("/usr/bin/bash"):  # WSL
-                    command = ["bash"] + command
-                else:
-                    # Use cmd.exe with bash while preserving all arguments
-                    command = ["cmd.exe", "/c", "bash"] + command
-
-        process = subprocess.Popen(
-            command,
-            shell=True,
-            universal_newlines=True,
-        )
-
-        process.wait()
-
+        return result.returncode
+    except subprocess.SubprocessError as e:
+        log.error(f"Subprocess error: {e}")
+        return 1
     except Exception as e:
-        print(f"Error: {str(e)}")
+        log.exception(f"Unexpected error: {e}")
         return 1
     finally:
-        if process and process.poll() is None:
-            process.terminate()
-            process.wait()
-    return process.returncode
-
-
-def run_command(command):
-    try:
-        result = subprocess.run(command, capture_output=True, text=True, check=True)
-        print("Script Output\n", result.stdout)
-        return result.returncode
-    except subprocess.CalledProcessError as e:
-        print("Error running script:", e)
-        print("Error output:", e.stderr)
-        return e.returncode
+        # Restore the original signal handler
+        signal.signal(signal.SIGINT, original_sigint)
diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index 3b0d01edd..958c7d387 100644
--- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -44,7 +44,9 @@ class PandasDataframeDataset:
         elif self.dataset_def.source.type == "rows":
             self.df = pandas.DataFrame(self.dataset_def.source.rows)
         else:
-            raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}")
+            raise ValueError(
+                f"Unsupported dataset source type: {self.dataset_def.source.type}"
+            )
 
         if self.df is None:
             raise ValueError(f"Failed to load dataset from {self.dataset_def.url}")
@@ -108,7 +110,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
 
         return IterrowsResponse(
             data=rows,
-            next_index=end if end < len(dataset_impl) else None,
+            next_start_index=end if end < len(dataset_impl) else None,
         )
 
     async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
@@ -117,4 +119,6 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
         dataset_impl.load()
 
         new_rows_df = pandas.DataFrame(rows)
-        dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True)
+        dataset_impl.df = pandas.concat(
+            [dataset_impl.df, new_rows_df], ignore_index=True
+        )
diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py
index 1364352e6..32c0b4e98 100644
--- a/llama_stack/providers/registry/safety.py
+++ b/llama_stack/providers/registry/safety.py
@@ -55,4 +55,13 @@ def available_providers() -> List[ProviderSpec]:
                 config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig",
             ),
         ),
+        remote_provider_spec(
+            api=Api.safety,
+            adapter=AdapterSpec(
+                adapter_type="nvidia",
+                pip_packages=["requests"],
+                module="llama_stack.providers.remote.safety.nvidia",
+                config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig",
+            ),
+        ),
     ]
diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index 41ce747f7..db6edbce3 100644
--- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -86,7 +86,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
 
         return IterrowsResponse(
             data=rows,
-            next_index=end if end < len(loaded_dataset) else None,
+            next_start_index=end if end < len(loaded_dataset) else None,
         )
 
     async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
@@ -98,9 +98,13 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
         new_dataset = hf_datasets.Dataset.from_list(rows)
 
         # Concatenate the new rows with existing dataset
-        updated_dataset = hf_datasets.concatenate_datasets([loaded_dataset, new_dataset])
+        updated_dataset = hf_datasets.concatenate_datasets(
+            [loaded_dataset, new_dataset]
+        )
 
         if dataset_def.metadata.get("path", None):
             updated_dataset.push_to_hub(dataset_def.metadata["path"])
         else:
-            raise NotImplementedError("Uploading to URL-based datasets is not supported yet")
+            raise NotImplementedError(
+                "Uploading to URL-based datasets is not supported yet"
+            )
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py
index 8f3a0d147..96b2d73d8 100644
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@@ -12,6 +12,7 @@ from llama_stack.apis.common.content_types import InterleavedContent
 from llama_stack.apis.inference import (
     ChatCompletionResponse,
     ChatCompletionResponseStreamChunk,
+    CompletionMessage,
     EmbeddingsResponse,
     EmbeddingTaskType,
     Inference,
@@ -160,12 +161,14 @@ class PassthroughInferenceAdapter(Inference):
         client = self._get_client()
         response = await client.inference.chat_completion(**json_params)
 
-        response = response.to_dict()
-
-        # temporary hack to remove the metrics from the response
-        response["metrics"] = []
-
-        return convert_to_pydantic(ChatCompletionResponse, response)
+        return ChatCompletionResponse(
+            completion_message=CompletionMessage(
+                content=response.completion_message.content.text,
+                stop_reason=response.completion_message.stop_reason,
+                tool_calls=response.completion_message.tool_calls,
+            ),
+            logprobs=response.logprobs,
+        )
 
     async def _stream_chat_completion(self, json_params: Dict[str, Any]) -> AsyncGenerator:
         client = self._get_client()
diff --git a/llama_stack/providers/remote/safety/nvidia/__init__.py b/llama_stack/providers/remote/safety/nvidia/__init__.py
new file mode 100644
index 000000000..4677268c6
--- /dev/null
+++ b/llama_stack/providers/remote/safety/nvidia/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+from typing import Any
+
+from .config import NVIDIASafetyConfig
+
+
+async def get_adapter_impl(config: NVIDIASafetyConfig, _deps) -> Any:
+    from .nvidia import NVIDIASafetyAdapter
+
+    impl = NVIDIASafetyAdapter(config)
+    await impl.initialize()
+    return impl
diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/llama_stack/providers/remote/safety/nvidia/config.py
new file mode 100644
index 000000000..3df80ed4f
--- /dev/null
+++ b/llama_stack/providers/remote/safety/nvidia/config.py
@@ -0,0 +1,37 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import os
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+from llama_stack.schema_utils import json_schema_type
+
+
+@json_schema_type
+class NVIDIASafetyConfig(BaseModel):
+    """
+    Configuration for the NVIDIA Guardrail microservice endpoint.
+
+    Attributes:
+        guardrails_service_url (str): A base url for accessing the NVIDIA guardrail endpoint, e.g. http://0.0.0.0:7331
+        config_id (str): The ID of the guardrails configuration to use from the configuration store
+         (https://developer.nvidia.com/docs/nemo-microservices/guardrails/source/guides/configuration-store-guide.html)
+
+    """
+
+    guardrails_service_url: str = Field(
+        default_factory=lambda: os.getenv("GUARDRAILS_SERVICE_URL", "http://0.0.0.0:7331"),
+        description="The url for accessing the guardrails service",
+    )
+    config_id: Optional[str] = Field(default="self-check", description="Config ID to use from the config store")
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+        return {
+            "guardrails_service_url": "${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}",
+            "config_id": "self-check",
+        }
diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/llama_stack/providers/remote/safety/nvidia/nvidia.py
new file mode 100644
index 000000000..6da2a8344
--- /dev/null
+++ b/llama_stack/providers/remote/safety/nvidia/nvidia.py
@@ -0,0 +1,154 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import logging
+from typing import Any, List, Optional
+
+import requests
+
+from llama_stack.apis.inference import Message
+from llama_stack.apis.safety import RunShieldResponse, Safety, SafetyViolation, ViolationLevel
+from llama_stack.apis.shields import Shield
+from llama_stack.distribution.library_client import convert_pydantic_to_json_value
+from llama_stack.providers.datatypes import ShieldsProtocolPrivate
+
+from .config import NVIDIASafetyConfig
+
+logger = logging.getLogger(__name__)
+
+
+class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
+    def __init__(self, config: NVIDIASafetyConfig) -> None:
+        """
+        Initialize the NVIDIASafetyAdapter with a given safety configuration.
+
+        Args:
+            config (NVIDIASafetyConfig): The configuration containing the guardrails service URL and config ID.
+        """
+        print(f"Initializing NVIDIASafetyAdapter({config.guardrails_service_url})...")
+        self.config = config
+
+    async def initialize(self) -> None:
+        pass
+
+    async def shutdown(self) -> None:
+        pass
+
+    async def register_shield(self, shield: Shield) -> None:
+        if not shield.provider_resource_id:
+            raise ValueError("Shield model not provided.")
+
+    async def run_shield(
+        self, shield_id: str, messages: List[Message], params: Optional[dict[str, Any]] = None
+    ) -> RunShieldResponse:
+        """
+        Run a safety shield check against the provided messages.
+
+        Args:
+            shield_id (str): The unique identifier for the shield to be used.
+            messages (List[Message]): A list of Message objects representing the conversation history.
+            params (Optional[dict[str, Any]]): Additional parameters for the shield check.
+
+        Returns:
+            RunShieldResponse: The response containing safety violation details if any.
+
+        Raises:
+            ValueError: If the shield with the provided shield_id is not found.
+        """
+        shield = await self.shield_store.get_shield(shield_id)
+        if not shield:
+            raise ValueError(f"Shield {shield_id} not found")
+
+        self.shield = NeMoGuardrails(self.config, shield.shield_id)
+        return await self.shield.run(messages)
+
+
+class NeMoGuardrails:
+    """
+    A class that encapsulates NVIDIA's guardrails safety logic.
+
+    Sends messages to the guardrails service and interprets the response to determine
+    if a safety violation has occurred.
+    """
+
+    def __init__(
+        self,
+        config: NVIDIASafetyConfig,
+        model: str,
+        threshold: float = 0.9,
+        temperature: float = 1.0,
+    ):
+        """
+        Initialize a NeMoGuardrails instance with the provided parameters.
+
+        Args:
+            config (NVIDIASafetyConfig): The safety configuration containing the config ID and guardrails URL.
+            model (str): The identifier or name of the model to be used for safety checks.
+            threshold (float, optional): The threshold for flagging violations. Defaults to 0.9.
+            temperature (float, optional): The temperature setting for the underlying model. Must be greater than 0. Defaults to 1.0.
+
+        Raises:
+            ValueError: If temperature is less than or equal to 0.
+            AssertionError: If config_id is not provided in the configuration.
+        """
+        self.config_id = config.config_id
+        self.model = model
+        assert self.config_id is not None, "Must provide config id"
+        if temperature <= 0:
+            raise ValueError("Temperature must be greater than 0")
+
+        self.temperature = temperature
+        self.threshold = threshold
+        self.guardrails_service_url = config.guardrails_service_url
+
+    async def run(self, messages: List[Message]) -> RunShieldResponse:
+        """
+        Queries the /v1/guardrails/checks endpoint of the NeMo guardrails deployed API.
+
+        Args:
+            messages (List[Message]): A list of Message objects to be checked for safety violations.
+
+        Returns:
+            RunShieldResponse: If the response indicates a violation ("blocked" status), returns a
+            RunShieldResponse with a SafetyViolation; otherwise, returns a RunShieldResponse with violation set to None.
+
+        Raises:
+            requests.HTTPError: If the POST request fails.
+        """
+        headers = {
+            "Accept": "application/json",
+        }
+        request_data = {
+            "model": self.model,
+            "messages": convert_pydantic_to_json_value(messages),
+            "temperature": self.temperature,
+            "top_p": 1,
+            "frequency_penalty": 0,
+            "presence_penalty": 0,
+            "max_tokens": 160,
+            "stream": False,
+            "guardrails": {
+                "config_id": self.config_id,
+            },
+        }
+        response = requests.post(
+            url=f"{self.guardrails_service_url}/v1/guardrail/checks", headers=headers, json=request_data
+        )
+        response.raise_for_status()
+        if "Content-Type" in response.headers and response.headers["Content-Type"].startswith("application/json"):
+            response_json = response.json()
+        if response_json["status"] == "blocked":
+            user_message = "Sorry I cannot do this."
+            metadata = response_json["rails_status"]
+
+            return RunShieldResponse(
+                violation=SafetyViolation(
+                    user_message=user_message,
+                    violation_level=ViolationLevel.ERROR,
+                    metadata=metadata,
+                )
+            )
+        return RunShieldResponse(violation=None)
diff --git a/llama_stack/scripts/__init__.py b/llama_stack/scripts/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/llama_stack/scripts/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/llama_stack/scripts/run_tests.sh b/llama_stack/scripts/run_tests.sh
deleted file mode 100644
index 49229d1b1..000000000
--- a/llama_stack/scripts/run_tests.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-THIS_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
-
-set -euo pipefail
-set -x
-
-stack_dir=$(dirname $(dirname $THIS_DIR))
-PYTHONPATH=$stack_dir pytest -p no:warnings --asyncio-mode auto --tb=short
diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml
index e9748721a..0c788ce86 100644
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@@ -1,13 +1,13 @@
 version: '2'
 distribution_spec:
-  description: Use NVIDIA NIM for running LLM inference
+  description: Use NVIDIA NIM for running LLM inference and safety
   providers:
     inference:
     - remote::nvidia
     vector_io:
     - inline::faiss
     safety:
-    - inline::llama-guard
+    - remote::nvidia
     agents:
     - inline::meta-reference
     telemetry:
@@ -15,16 +15,9 @@ distribution_spec:
     eval:
     - inline::meta-reference
     datasetio:
-    - remote::huggingface
     - inline::localfs
     scoring:
     - inline::basic
-    - inline::llm-as-judge
-    - inline::braintrust
     tool_runtime:
-    - remote::brave-search
-    - remote::tavily-search
-    - inline::code-interpreter
     - inline::rag-runtime
-    - remote::model-context-protocol
 image_type: conda
diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py
index cc5e96333..308c0e2a6 100644
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@@ -6,9 +6,10 @@
 
 from pathlib import Path
 
-from llama_stack.distribution.datatypes import Provider, ToolGroupInput
+from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
+from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
 
 
@@ -16,19 +17,13 @@ def get_distribution_template() -> DistributionTemplate:
     providers = {
         "inference": ["remote::nvidia"],
         "vector_io": ["inline::faiss"],
-        "safety": ["inline::llama-guard"],
+        "safety": ["remote::nvidia"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
         "eval": ["inline::meta-reference"],
-        "datasetio": ["remote::huggingface", "inline::localfs"],
-        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
-        "tool_runtime": [
-            "remote::brave-search",
-            "remote::tavily-search",
-            "inline::code-interpreter",
-            "inline::rag-runtime",
-            "remote::model-context-protocol",
-        ],
+        "datasetio": ["inline::localfs"],
+        "scoring": ["inline::basic"],
+        "tool_runtime": ["inline::rag-runtime"],
     }
 
     inference_provider = Provider(
@@ -36,30 +31,35 @@ def get_distribution_template() -> DistributionTemplate:
         provider_type="remote::nvidia",
         config=NVIDIAConfig.sample_run_config(),
     )
+    safety_provider = Provider(
+        provider_id="nvidia",
+        provider_type="remote::nvidia",
+        config=NVIDIASafetyConfig.sample_run_config(),
+    )
+    inference_model = ModelInput(
+        model_id="${env.INFERENCE_MODEL}",
+        provider_id="nvidia",
+    )
+    safety_model = ModelInput(
+        model_id="${env.SAFETY_MODEL}",
+        provider_id="nvidia",
+    )
 
     available_models = {
         "nvidia": MODEL_ENTRIES,
     }
     default_tool_groups = [
-        ToolGroupInput(
-            toolgroup_id="builtin::websearch",
-            provider_id="tavily-search",
-        ),
         ToolGroupInput(
             toolgroup_id="builtin::rag",
             provider_id="rag-runtime",
         ),
-        ToolGroupInput(
-            toolgroup_id="builtin::code_interpreter",
-            provider_id="code-interpreter",
-        ),
     ]
 
     default_models = get_model_registry(available_models)
     return DistributionTemplate(
         name="nvidia",
         distro_type="remote_hosted",
-        description="Use NVIDIA NIM for running LLM inference",
+        description="Use NVIDIA NIM for running LLM inference and safety",
         container_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
@@ -72,15 +72,34 @@ def get_distribution_template() -> DistributionTemplate:
                 default_models=default_models,
                 default_tool_groups=default_tool_groups,
             ),
+            "run-with-safety.yaml": RunConfigSettings(
+                provider_overrides={
+                    "inference": [
+                        inference_provider,
+                        safety_provider,
+                    ]
+                },
+                default_models=[inference_model, safety_model],
+                default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
+                default_tool_groups=default_tool_groups,
+            ),
         },
         run_config_env_vars={
-            "LLAMASTACK_PORT": (
-                "5001",
-                "Port for the Llama Stack distribution server",
-            ),
             "NVIDIA_API_KEY": (
                 "",
                 "NVIDIA API Key",
             ),
+            "GUARDRAILS_SERVICE_URL": (
+                "http://0.0.0.0:7331",
+                "URL for the NeMo Guardrails Service",
+            ),
+            "INFERENCE_MODEL": (
+                "Llama3.1-8B-Instruct",
+                "Inference model",
+            ),
+            "SAFETY_MODEL": (
+                "meta/llama-3.1-8b-instruct",
+                "Name of the model to use for safety",
+            ),
         },
     )
diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml
new file mode 100644
index 000000000..04da1bcda
--- /dev/null
+++ b/llama_stack/templates/nvidia/run-with-safety.yaml
@@ -0,0 +1,101 @@
+version: '2'
+image_name: nvidia
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- telemetry
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:}
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
+      config_id: self-check
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
+  safety:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
+      config_id: self-check
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/nvidia/trace_store.db}
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
+  datasetio:
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/localfs_datasetio.db
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  tool_runtime:
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+    config: {}
+metadata_store:
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: nvidia
+  model_type: llm
+- metadata: {}
+  model_id: ${env.SAFETY_MODEL}
+  provider_id: nvidia
+  model_type: llm
+shields:
+- shield_id: ${env.SAFETY_MODEL}
+  provider_id: nvidia
+vector_dbs: []
+datasets: []
+scoring_fns: []
+benchmarks: []
+tool_groups:
+- toolgroup_id: builtin::rag
+  provider_id: rag-runtime
+server:
+  port: 8321
diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml
index 213e22cb2..3abdd82a7 100644
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@@ -26,10 +26,11 @@ providers:
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
   safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
+  - provider_id: nvidia
+    provider_type: remote::nvidia
     config:
-      excluded_categories: []
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:http://localhost:7331}
+      config_id: self-check
   agents:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
@@ -54,13 +55,6 @@ providers:
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/meta_reference_eval.db
   datasetio:
-  - provider_id: huggingface
-    provider_type: remote::huggingface
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/huggingface_datasetio.db
   - provider_id: localfs
     provider_type: inline::localfs
     config:
@@ -72,33 +66,10 @@ providers:
   - provider_id: basic
     provider_type: inline::basic
     config: {}
-  - provider_id: llm-as-judge
-    provider_type: inline::llm-as-judge
-    config: {}
-  - provider_id: braintrust
-    provider_type: inline::braintrust
-    config:
-      openai_api_key: ${env.OPENAI_API_KEY:}
   tool_runtime:
-  - provider_id: brave-search
-    provider_type: remote::brave-search
-    config:
-      api_key: ${env.BRAVE_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: tavily-search
-    provider_type: remote::tavily-search
-    config:
-      api_key: ${env.TAVILY_SEARCH_API_KEY:}
-      max_results: 3
-  - provider_id: code-interpreter
-    provider_type: inline::code-interpreter
-    config: {}
   - provider_id: rag-runtime
     provider_type: inline::rag-runtime
     config: {}
-  - provider_id: model-context-protocol
-    provider_type: remote::model-context-protocol
-    config: {}
 metadata_store:
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
@@ -227,11 +198,7 @@ datasets: []
 scoring_fns: []
 benchmarks: []
 tool_groups:
-- toolgroup_id: builtin::websearch
-  provider_id: tavily-search
 - toolgroup_id: builtin::rag
   provider_id: rag-runtime
-- toolgroup_id: builtin::code_interpreter
-  provider_id: code-interpreter
 server:
   port: 8321
diff --git a/pyproject.toml b/pyproject.toml
index 4a5befbd0..a006d69f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -269,6 +269,7 @@ exclude = [
     "^llama_stack/providers/remote/inference/together/",
     "^llama_stack/providers/remote/inference/vllm/",
     "^llama_stack/providers/remote/safety/bedrock/",
+    "^llama_stack/providers/remote/safety/nvidia/",
     "^llama_stack/providers/remote/safety/sample/",
     "^llama_stack/providers/remote/tool_runtime/bing_search/",
     "^llama_stack/providers/remote/tool_runtime/brave_search/",
diff --git a/llama_stack/scripts/distro_codegen.py b/scripts/distro_codegen.py
old mode 100644
new mode 100755
similarity index 98%
rename from llama_stack/scripts/distro_codegen.py
rename to scripts/distro_codegen.py
index 92c82983e..e19ba8cb9
--- a/llama_stack/scripts/distro_codegen.py
+++ b/scripts/distro_codegen.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
@@ -20,7 +21,7 @@ from llama_stack.distribution.build import (
     get_provider_dependencies,
 )
 
-REPO_ROOT = Path(__file__).parent.parent.parent
+REPO_ROOT = Path(__file__).parent.parent
 
 
 class ChangedPathTracker:
diff --git a/scripts/gen-changelog.py b/scripts/gen-changelog.py
old mode 100644
new mode 100755
index 668146901..ac4053339
--- a/scripts/gen-changelog.py
+++ b/scripts/gen-changelog.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
diff --git a/llama_stack/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py
old mode 100644
new mode 100755
similarity index 92%
rename from llama_stack/scripts/generate_prompt_format.py
rename to scripts/generate_prompt_format.py
index 338b23f3e..72b057992
--- a/llama_stack/scripts/generate_prompt_format.py
+++ b/scripts/generate_prompt_format.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
@@ -18,7 +19,7 @@ import fire
 
 from llama_stack.models.llama.sku_list import resolve_model
 from llama_stack.providers.inline.inference.meta_reference.config import MetaReferenceInferenceConfig
-from llama_stack.providers.inline.inference.meta_reference.generation import Llama
+from llama_stack.providers.inline.inference.meta_reference.llama3.generation import Llama3
 
 THIS_DIR = Path(__file__).parent.resolve()
 
@@ -41,7 +42,7 @@ def run_main(
     llama_model = resolve_model(model_id)
     if not llama_model:
         raise ValueError(f"Model {model_id} not found")
-    generator = Llama.build(
+    generator = Llama3.build(
         config=config,
         model_id=model_id,
         llama_model=llama_model,
diff --git a/llama_stack/scripts/run_client_sdk_tests.py b/scripts/run_client_sdk_tests.py
old mode 100644
new mode 100755
similarity index 91%
rename from llama_stack/scripts/run_client_sdk_tests.py
rename to scripts/run_client_sdk_tests.py
index e70d187aa..b93316c4f
--- a/llama_stack/scripts/run_client_sdk_tests.py
+++ b/scripts/run_client_sdk_tests.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
@@ -15,8 +16,7 @@ Script for running api on AsyncLlamaStackAsLibraryClient with templates
 
 Assuming directory structure:
 - llama-stack
-    - llama_stack
-        - scripts
+    - scripts
     - tests
         - api
 
@@ -25,10 +25,10 @@ Example command:
 cd llama-stack
 EXPORT TOGETHER_API_KEY=<..>
 EXPORT FIREWORKS_API_KEY=<..>
-python llama_stack/scripts/run_client_sdk_tests.py --templates together fireworks --report
+./scripts/run_client_sdk_tests.py --templates together fireworks --report
 """
 
-REPO_ROOT = Path(__file__).parent.parent.parent
+REPO_ROOT = Path(__file__).parent.parent
 CLIENT_SDK_TESTS_RELATIVE_PATH = "tests/api/"