diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 2a9f4b6f7..6b98cad90 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -52,6 +52,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -97,6 +109,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -128,6 +152,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -159,6 +195,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -183,6 +231,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -219,6 +279,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -255,6 +327,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -286,6 +370,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -317,6 +413,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -362,6 +470,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -410,6 +530,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -438,6 +570,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -462,6 +606,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -492,6 +648,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -532,6 +700,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -570,6 +750,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -608,6 +800,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -648,6 +852,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -679,6 +895,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -719,6 +947,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -773,6 +1013,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -826,6 +1078,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -863,6 +1127,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -884,6 +1160,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -921,6 +1209,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -942,6 +1242,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -979,6 +1291,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1016,6 +1340,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1046,6 +1382,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1084,6 +1432,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1124,6 +1484,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1154,6 +1526,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1175,6 +1559,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1205,6 +1601,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1242,6 +1650,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1279,6 +1699,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1309,6 +1741,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1337,6 +1781,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1373,6 +1829,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1422,6 +1890,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1443,6 +1923,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1473,6 +1965,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1487,6 +1991,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1511,6 +2027,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1542,6 +2070,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1580,6 +2120,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1609,6 +2161,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1647,6 +2211,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1685,6 +2261,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1697,6 +2285,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1728,6 +2328,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1740,6 +2352,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1771,6 +2395,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1802,6 +2438,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1821,6 +2469,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1852,6 +2512,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1873,6 +2545,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1894,6 +2578,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1932,6 +2628,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1944,6 +2652,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1975,6 +2695,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -1994,6 +2726,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2025,6 +2769,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2037,6 +2793,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2068,6 +2836,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2098,6 +2878,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2117,6 +2909,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2141,6 +2945,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2172,6 +2988,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2203,6 +3031,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2234,6 +3074,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2265,6 +3117,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2296,6 +3160,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2332,6 +3208,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2391,6 +3279,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2431,6 +3331,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2455,6 +3367,18 @@
                 "responses": {
                     "200": {
                         "description": "OK"
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2486,6 +3410,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2517,6 +3453,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2548,6 +3496,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2579,6 +3539,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2610,6 +3582,18 @@
                                 }
                             }
                         }
+                    },
+                    "400": {
+                        "$ref": "#/components/responses/BadRequest400"
+                    },
+                    "429": {
+                        "$ref": "#/components/responses/TooManyRequests429"
+                    },
+                    "500": {
+                        "$ref": "#/components/responses/InternalServerError500"
+                    },
+                    "default": {
+                        "$ref": "#/components/responses/DefaultError"
                     }
                 },
                 "tags": [
@@ -2623,6 +3607,35 @@
     "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
     "components": {
         "schemas": {
+            "Error": {
+                "type": "object",
+                "properties": {
+                    "status": {
+                        "type": "integer",
+                        "description": "HTTP status code"
+                    },
+                    "title": {
+                        "type": "string",
+                        "description": "Error title, a short summary of the error which is invariant for an error type"
+                    },
+                    "detail": {
+                        "type": "string",
+                        "description": "Error detail, a longer human-readable description of the error"
+                    },
+                    "instance": {
+                        "type": "string",
+                        "description": "(Optional) A URL which can be used to retrieve more information about the specific occurrence of the error"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "status",
+                    "title",
+                    "detail"
+                ],
+                "title": "Error",
+                "description": "Error response from the API. Roughly follows RFC 7807."
+            },
             "AppendRowsRequest": {
                 "type": "object",
                 "properties": {
@@ -8741,7 +9754,68 @@
                 "title": "VersionInfo"
             }
         },
-        "responses": {}
+        "responses": {
+            "BadRequest400": {
+                "description": "The request was invalid or malformed",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 400,
+                            "title": "Bad Request",
+                            "detail": "The request was invalid or malformed"
+                        }
+                    }
+                }
+            },
+            "TooManyRequests429": {
+                "description": "The client has sent too many requests in a given amount of time",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 429,
+                            "title": "Too Many Requests",
+                            "detail": "You have exceeded the rate limit. Please try again later."
+                        }
+                    }
+                }
+            },
+            "InternalServerError500": {
+                "description": "The server encountered an unexpected error",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 500,
+                            "title": "Internal Server Error",
+                            "detail": "An unexpected error occurred. Our team has been notified."
+                        }
+                    }
+                }
+            },
+            "DefaultError": {
+                "description": "An unexpected error occurred",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "$ref": "#/components/schemas/Error"
+                        },
+                        "example": {
+                            "status": 0,
+                            "title": "Error",
+                            "detail": "An unexpected error occurred"
+                        }
+                    }
+                }
+            }
+        }
     },
     "security": [
         {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a2329e47a..13f7edc4b 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -19,6 +19,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/PaginatedRowsResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - DatasetIO
       description: ''
@@ -47,6 +57,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - DatasetIO
       description: ''
@@ -66,6 +86,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/BatchChatCompletionResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - BatchInference (Coming Soon)
       description: ''
@@ -85,6 +115,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/BatchCompletionResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - BatchInference (Coming Soon)
       description: ''
@@ -100,6 +140,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -124,6 +174,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
       description: >-
@@ -149,6 +209,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/CompletionResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
       description: >-
@@ -169,6 +239,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/AgentCreateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -188,6 +268,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/AgentSessionCreateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -217,6 +307,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -246,6 +346,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListBucketResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: List all buckets.
@@ -263,6 +373,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/FileUploadResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -279,6 +399,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -297,6 +427,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Session'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -322,6 +462,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -345,6 +495,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/FileResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -371,6 +531,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/FileResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -401,6 +571,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/EmbeddingsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inference
       description: >-
@@ -421,6 +601,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -445,6 +635,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/AgentStepResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -478,6 +678,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Turn'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: ''
@@ -508,6 +718,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Benchmark'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
       description: ''
@@ -528,6 +748,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Dataset'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -541,6 +771,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -561,6 +801,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Model'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -574,6 +824,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -594,6 +854,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/ScoringFn'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
       description: ''
@@ -614,6 +884,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/Shield'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
       description: ''
@@ -632,6 +912,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Span'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -655,6 +945,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QuerySpanTreeResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -679,6 +979,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Tool'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: ''
@@ -697,6 +1007,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ToolGroup'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: ''
@@ -710,6 +1030,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: Unregister a tool group
@@ -728,6 +1058,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Trace'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -748,6 +1088,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -768,6 +1118,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/PostTrainingJobStatusResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -786,6 +1146,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListPostTrainingJobsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -801,6 +1171,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/FileUploadResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -822,6 +1202,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/FileResponse'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: >-
@@ -852,6 +1242,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/VectorDB'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -865,6 +1265,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -883,6 +1293,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/HealthInfo'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -892,6 +1312,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: >-
@@ -908,6 +1338,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
       description: ''
@@ -927,6 +1367,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ToolInvocationResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: Run a tool with the given arguments
@@ -948,6 +1398,16 @@ paths:
                 oneOf:
                   - $ref: '#/components/schemas/JobStatus'
                   - type: 'null'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -966,6 +1426,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -989,6 +1459,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/EvaluateResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -1012,6 +1492,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListBenchmarksResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
       description: ''
@@ -1020,6 +1510,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Benchmarks
       description: ''
@@ -1039,6 +1539,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListDatasetsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -1047,6 +1557,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Datasets
       description: ''
@@ -1066,6 +1586,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListFileResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Files (Coming Soon)
       description: List all files in a bucket.
@@ -1085,6 +1615,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListModelsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -1097,6 +1637,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Model'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Models
       description: ''
@@ -1116,6 +1666,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListProvidersResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -1129,6 +1689,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListRoutesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -1142,6 +1712,16 @@ paths:
             application/jsonl:
               schema:
                 $ref: '#/components/schemas/ToolDef'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: ''
@@ -1165,6 +1745,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListScoringFunctionsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
       description: ''
@@ -1173,6 +1763,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ScoringFunctions
       description: ''
@@ -1192,6 +1792,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListShieldsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
       description: ''
@@ -1204,6 +1814,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Shield'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Shields
       description: ''
@@ -1223,6 +1843,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListToolGroupsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: List tool groups with optional provider
@@ -1231,6 +1861,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: Register a tool group
@@ -1250,6 +1890,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListToolsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolGroups
       description: List tools with optional tool group
@@ -1268,6 +1918,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ListVectorDBsResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -1280,6 +1940,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/VectorDB'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorDBs
       description: ''
@@ -1295,6 +1965,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1314,6 +1994,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -1333,6 +2023,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/RAGQueryResult'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - ToolRuntime
       description: >-
@@ -1353,6 +2053,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QueryChunksResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - VectorIO
       description: ''
@@ -1372,6 +2082,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QuerySpansResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1391,6 +2111,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/QueryTracesResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1415,6 +2145,16 @@ paths:
             text/event-stream:
               schema:
                 $ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Agents
       description: >-
@@ -1457,6 +2197,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/Job'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Eval
       description: ''
@@ -1481,6 +2231,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/RunShieldResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Safety
       description: ''
@@ -1496,6 +2256,16 @@ paths:
       responses:
         '200':
           description: OK
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Telemetry
       description: ''
@@ -1515,6 +2285,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ScoreResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Scoring
       description: ''
@@ -1534,6 +2314,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/ScoreBatchResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Scoring
       description: ''
@@ -1553,6 +2343,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/PostTrainingJob'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - PostTraining (Coming Soon)
       description: ''
@@ -1572,6 +2372,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/SyntheticDataGenerationResponse'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - SyntheticDataGeneration (Coming Soon)
       description: ''
@@ -1591,6 +2401,16 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/VersionInfo'
+        '400':
+          $ref: '#/components/responses/BadRequest400'
+        '429':
+          $ref: >-
+            #/components/responses/TooManyRequests429
+        '500':
+          $ref: >-
+            #/components/responses/InternalServerError500
+        default:
+          $ref: '#/components/responses/DefaultError'
       tags:
         - Inspect
       description: ''
@@ -1599,6 +2419,34 @@ jsonSchemaDialect: >-
   https://json-schema.org/draft/2020-12/schema
 components:
   schemas:
+    Error:
+      type: object
+      properties:
+        status:
+          type: integer
+          description: HTTP status code
+        title:
+          type: string
+          description: >-
+            Error title, a short summary of the error which is invariant for an error
+            type
+        detail:
+          type: string
+          description: >-
+            Error detail, a longer human-readable description of the error
+        instance:
+          type: string
+          description: >-
+            (Optional) A URL which can be used to retrieve more information about
+            the specific occurrence of the error
+      additionalProperties: false
+      required:
+        - status
+        - title
+        - detail
+      title: Error
+      description: >-
+        Error response from the API. Roughly follows RFC 7807.
     AppendRowsRequest:
       type: object
       properties:
@@ -5626,7 +6474,51 @@ components:
       required:
         - version
       title: VersionInfo
-  responses: {}
+  responses:
+    BadRequest400:
+      description: The request was invalid or malformed
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 400
+            title: Bad Request
+            detail: The request was invalid or malformed
+    TooManyRequests429:
+      description: >-
+        The client has sent too many requests in a given amount of time
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 429
+            title: Too Many Requests
+            detail: >-
+              You have exceeded the rate limit. Please try again later.
+    InternalServerError500:
+      description: >-
+        The server encountered an unexpected error
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 500
+            title: Internal Server Error
+            detail: >-
+              An unexpected error occurred. Our team has been notified.
+    DefaultError:
+      description: An unexpected error occurred
+      content:
+        application/json:
+          schema:
+            $ref: '#/components/schemas/Error'
+          example:
+            status: 0
+            title: Error
+            detail: An unexpected error occurred
 security:
   - Default: []
 tags:
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index dcbee7d2f..a2553f905 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -55,6 +55,7 @@ def main(output_dir: str):
                 a set of endpoints and their corresponding interfaces that are tailored to
                 best leverage Llama Models.""",
             ),
+            include_standard_error_responses=True,
         ),
     )
 
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 4220cfc05..91f32e6c8 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -10,6 +10,7 @@ import typing
 from dataclasses import make_dataclass
 from typing import Any, Dict, Set, Union
 
+from llama_stack.apis.datatypes import Error
 from llama_stack.strong_typing.core import JsonType
 from llama_stack.strong_typing.docstring import Docstring, parse_type
 from llama_stack.strong_typing.inspection import (
@@ -434,6 +435,75 @@ class Generator:
         )
         self.schema_builder = SchemaBuilder(schema_generator)
         self.responses = {}
+        
+        # Create standard error responses
+        self._create_standard_error_responses()
+
+    def _create_standard_error_responses(self) -> None:
+        """
+        Creates standard error responses that can be reused across operations.
+        These will be added to the components.responses section of the OpenAPI document.
+        """
+        # Get the Error schema
+        error_schema = self.schema_builder.classdef_to_ref(Error)
+        
+        # Create standard error responses
+        self.responses["BadRequest400"] = Response(
+            description="The request was invalid or malformed",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 400,
+                        "title": "Bad Request",
+                        "detail": "The request was invalid or malformed",
+                    }
+                )
+            }
+        )
+        
+        self.responses["TooManyRequests429"] = Response(
+            description="The client has sent too many requests in a given amount of time",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 429,
+                        "title": "Too Many Requests",
+                        "detail": "You have exceeded the rate limit. Please try again later.",
+                    }
+                )
+            }
+        )
+        
+        self.responses["InternalServerError500"] = Response(
+            description="The server encountered an unexpected error",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 500,
+                        "title": "Internal Server Error",
+                        "detail": "An unexpected error occurred. Our team has been notified.",
+                    }
+                )
+            }
+        )
+        
+        # Add a default error response for any unhandled error cases
+        self.responses["DefaultError"] = Response(
+            description="An unexpected error occurred",
+            content={
+                "application/json": MediaType(
+                    schema=error_schema,
+                    example={
+                        "status": 0,
+                        "title": "Error",
+                        "detail": "An unexpected error occurred",
+                    }
+                )
+            }
+        )
 
     def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
         # Don't include schema definition in the tag description because for one,
@@ -649,6 +719,18 @@ class Generator:
             responses.update(response_builder.build_response(response_options))
 
         assert len(responses.keys()) > 0, f"No responses found for {op.name}"
+        
+        # Add standard error response references
+        if self.options.include_standard_error_responses:
+            if "400" not in responses:
+                responses["400"] = ResponseRef("BadRequest400")
+            if "429" not in responses:
+                responses["429"] = ResponseRef("TooManyRequests429")
+            if "500" not in responses:
+                responses["500"] = ResponseRef("InternalServerError500")
+            if "default" not in responses:
+                responses["default"] = ResponseRef("DefaultError")
+        
         if op.event_type is not None:
             builder = ContentBuilder(self.schema_builder)
             callbacks = {
diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py
index f80da453b..edc861ad5 100644
--- a/docs/openapi_generator/pyopenapi/options.py
+++ b/docs/openapi_generator/pyopenapi/options.py
@@ -35,6 +35,7 @@ class Options:
     :param error_wrapper: True if errors are encapsulated in an error object wrapper.
     :param property_description_fun: Custom transformation function to apply to class property documentation strings.
     :param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types.
+    :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations.
     """
 
     server: Server
@@ -52,6 +53,7 @@ class Options:
     error_wrapper: bool = False
     property_description_fun: Optional[Callable[[type, str, str], str]] = None
     captions: Optional[Dict[str, str]] = None
+    include_standard_error_responses: bool = True
 
     default_captions: ClassVar[Dict[str, str]] = {
         "Operations": "Operations",
diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index 9cb1a402f..20a835201 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -106,7 +106,7 @@ It would be best to start with a template and understand the structure of the co
 llama stack build
 
 > Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
-> Enter the image type you want your Llama Stack to be built as (container or conda): conda
+> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda
 
 Llama Stack is composed of several APIs working together. Let's select
 the provider types (implementations) you want to use for these APIs.
@@ -187,7 +187,7 @@ usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-i
                        [--tls-certfile TLS_CERTFILE] [--image-type {conda,container,venv}]
                        config
 
-start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
+Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
 
 positional arguments:
   config                Path to config file to use for the run
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index b183757db..b8d1b1714 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -41,12 +41,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index 9aeb7a88b..a49175e22 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -41,12 +41,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution
diff --git a/docs/source/references/llama_cli_reference/download_models.md b/docs/source/references/llama_cli_reference/download_models.md
index 6c791bcb7..ca470f8c2 100644
--- a/docs/source/references/llama_cli_reference/download_models.md
+++ b/docs/source/references/llama_cli_reference/download_models.md
@@ -129,3 +129,35 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern
 **Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
 
 > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
+
+## List the downloaded models
+
+To list the downloaded models with the following command:
+```
+llama model list --downloaded
+```
+
+You should see a table like this:
+```
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
+```
diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md
index a43666963..8a38fc3ae 100644
--- a/docs/source/references/llama_cli_reference/index.md
+++ b/docs/source/references/llama_cli_reference/index.md
@@ -154,6 +154,38 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern
 
 > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
 
+## List the downloaded models
+
+To list the downloaded models with the following command:
+```
+llama model list --downloaded
+```
+
+You should see a table like this:
+```
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
+```
+
 
 ## Understand the models
 The `llama model` command helps you explore the model’s interface.
diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md
index 7dfd8e5fa..98f40bc3c 100644
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@@ -73,7 +73,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
    Open a new terminal and install `llama-stack`:
    ```bash
    conda activate ollama
-   pip install llama-stack==0.1.0
+   pip install -U llama-stack
    ```
 
 ---
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 6df93052c..842a2b63d 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -5,6 +5,9 @@
 # the root directory of this source tree.
 
 from enum import Enum
+from typing import Optional
+
+from pydantic import BaseModel
 
 from llama_stack.schema_utils import json_schema_type
 
@@ -33,3 +36,20 @@ class Api(Enum):
 
     # built-in API
     inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+    """
+    Error response from the API. Roughly follows RFC 7807.
+
+    :param status: HTTP status code
+    :param title: Error title, a short summary of the error which is invariant for an error type
+    :param detail: Error detail, a longer human-readable description of the error
+    :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+    """
+
+    status: int
+    title: str
+    detail: str
+    instance: Optional[str] = None
diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index 2b43b8128..3d808a4a4 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -15,7 +15,6 @@ from termcolor import cprint
 
 from llama_stack.distribution.datatypes import BuildConfig, Provider
 from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
 from llama_stack.distribution.utils.exec import run_command, run_with_pty
 from llama_stack.distribution.utils.image_types import ImageType
 from llama_stack.providers.datatypes import Api
@@ -103,8 +102,6 @@ def build_image(
             template_or_config,
             image_name,
             container_base,
-            str(build_file_path),
-            str(BUILDS_BASE_DIR / ImageType.container.value),
             " ".join(normal_deps),
         ]
     elif build_config.image_type == ImageType.conda.value:
diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh
index 31b3e1b21..1eac2ee08 100755
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
@@ -52,7 +52,7 @@ ensure_conda_env_python310() {
   local python_version="3.10"
 
   # Check if conda command is available
-  if ! command -v conda &>/dev/null; then
+  if ! is_command_available conda; then
     printf "${RED}Error: conda command not found. Is Conda installed and in your PATH?${NC}" >&2
     exit 1
   fi
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 08941a538..04d115f70 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
@@ -20,26 +20,27 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
 # mounting is not supported by docker buildx, so we use COPY instead
 USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
 
-if [ "$#" -lt 6 ]; then
+if [ "$#" -lt 4 ]; then
   # This only works for templates
-  echo "Usage: $0 <template_or_config> <image_name> <container_base> <build_file_path> <host_build_dir> <pip_dependencies> [<special_pip_deps>]" >&2
+  echo "Usage: $0 <template_or_config> <image_name> <container_base> <pip_dependencies> [<special_pip_deps>]" >&2
   exit 1
 fi
 
 set -euo pipefail
 
 template_or_config="$1"
-image_name="$2"
-container_base="$3"
-build_file_path="$4"
-host_build_dir="$5"
-pip_dependencies="$6"
-special_pip_deps="${7:-}"
+shift
+image_name="$1"
+shift
+container_base="$1"
+shift
+pip_dependencies="$1"
+shift
+special_pip_deps="${1:-}"
 
 
 # Define color codes
 RED='\033[0;31m'
-GREEN='\033[0;32m'
 NC='\033[0m' # No Color
 
 CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
@@ -47,8 +48,10 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-}
 
 TEMP_DIR=$(mktemp -d)
 
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+source "$SCRIPT_DIR/common.sh"
+
 add_to_container() {
-  local input
   output_file="$TEMP_DIR/Containerfile"
   if [ -t 0 ]; then
     printf '%s\n' "$1" >>"$output_file"
@@ -58,15 +61,21 @@ add_to_container() {
   fi
 }
 
+# Check if container command is available
+if ! is_command_available $CONTAINER_BINARY; then
+  printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
+  exit 1
+fi
+
 # Update and install UBI9 components if UBI9 base image is used
 if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
   add_to_container << EOF
 FROM $container_base
 WORKDIR /app
 
-RUN microdnf -y update && microdnf install -y iputils net-tools wget \
+RUN dnf -y update && dnf install -y iputils net-tools wget \
     vim-minimal python3.11 python3.11-pip python3.11-wheel \
-    python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && microdnf clean all
+    python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
 
 ENV UV_SYSTEM_PYTHON=1
 RUN pip install uv
@@ -150,12 +159,12 @@ EOF
     add_to_container << EOF
 RUN uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
   --index-strategy unsafe-best-match \
-  llama-models==$TEST_PYPI_VERSION llama-stack-client==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION
+  llama-stack==$TEST_PYPI_VERSION
 
 EOF
   else
     if [ -n "$PYPI_VERSION" ]; then
-      SPEC_VERSION="llama-stack==${PYPI_VERSION} llama-models==${PYPI_VERSION} llama-stack-client==${PYPI_VERSION}"
+      SPEC_VERSION="llama-stack==${PYPI_VERSION}"
     else
       SPEC_VERSION="llama-stack"
     fi
@@ -165,6 +174,11 @@ EOF
   fi
 fi
 
+# remove uv after installation
+  add_to_container << EOF
+RUN pip uninstall -y uv
+EOF
+
 # if template_or_config ends with .yaml, it is not a template and we should not use the --template flag
 if [[ "$template_or_config" != *.yaml ]]; then
   add_to_container << EOF
@@ -185,26 +199,31 @@ RUN mkdir -p /.llama /.cache
 RUN chmod -R g+rw /app /.llama /.cache
 EOF
 
-printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n"
-cat $TEMP_DIR/Containerfile
+printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
+cat "$TEMP_DIR"/Containerfile
 printf "\n"
 
-mounts=""
+# Start building the CLI arguments
+CLI_ARGS=()
+
+# Read CONTAINER_OPTS and put it in an array
+read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
+
 if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
   if [ -n "$LLAMA_STACK_DIR" ]; then
-    mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):$stack_mount"
+    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
   fi
   if [ -n "$LLAMA_MODELS_DIR" ]; then
-    mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount"
+    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_MODELS_DIR"):$models_mount")
   fi
   if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-    mounts="$mounts -v $(readlink -f $LLAMA_STACK_CLIENT_DIR):$client_mount"
+    CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
   fi
 fi
 
-if command -v selinuxenabled &>/dev/null && selinuxenabled; then
+if is_command_available selinuxenabled && selinuxenabled; then
   # Disable SELinux labels -- we don't want to relabel the llama-stack source dir
-  CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
+  CLI_ARGS+=("--security-opt" "label=disable")
 fi
 
 # Set version tag based on PyPI version
@@ -225,11 +244,11 @@ image_tag="$image_name:$version_tag"
 # Detect platform architecture
 ARCH=$(uname -m)
 if [ -n "$BUILD_PLATFORM" ]; then
-  PLATFORM="--platform $BUILD_PLATFORM"
+  CLI_ARGS+=("--platform $BUILD_PLATFORM")
 elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
-  PLATFORM="--platform linux/arm64"
+  CLI_ARGS+=("--platform" "linux/arm64")
 elif [ "$ARCH" = "x86_64" ]; then
-  PLATFORM="--platform linux/amd64"
+  CLI_ARGS+=("--platform" "linux/amd64")
 else
   echo "Unsupported architecture: $ARCH"
   exit 1
@@ -238,8 +257,13 @@ fi
 echo "PWD: $(pwd)"
 echo "Containerfile: $TEMP_DIR/Containerfile"
 set -x
-$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag \
-  -f "$TEMP_DIR/Containerfile" "." $mounts --progress=plain
+
+$CONTAINER_BINARY build \
+  "${CLI_ARGS[@]}" \
+  -t "$image_tag" \
+  -f "$TEMP_DIR/Containerfile" \
+  "." \
+  --progress=plain
 
 # clean up tmp/configs
 set +x
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index 48157b018..a01f7f1f3 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -3,6 +3,7 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import json
 from typing import Any, Dict, List, Optional
 
 from tqdm import tqdm
@@ -116,7 +117,7 @@ class MetaReferenceEvalImpl(
         generations = []
         for i, x in tqdm(enumerate(input_rows)):
             assert ColumnName.chat_completion_input.value in x, "Invalid input row"
-            input_messages = eval(str(x[ColumnName.chat_completion_input.value]))
+            input_messages = json.loads(x[ColumnName.chat_completion_input.value])
             input_messages = [UserMessage(**x) for x in input_messages]
 
             # NOTE: only single-turn agent generation is supported. Create a new session for each input row
@@ -158,7 +159,7 @@ class MetaReferenceEvalImpl(
         generations = []
         for x in tqdm(input_rows):
             if ColumnName.completion_input.value in x:
-                input_content = eval(str(x[ColumnName.completion_input.value]))
+                input_content = json.loads(x[ColumnName.completion_input.value])
                 response = await self.inference_api.completion(
                     model=candidate.model,
                     content=input_content,
@@ -166,9 +167,8 @@ class MetaReferenceEvalImpl(
                 )
                 generations.append({ColumnName.generated_answer.value: response.completion_message.content})
             elif ColumnName.chat_completion_input.value in x:
-                chat_completion_input_str = str(x[ColumnName.chat_completion_input.value])
-                input_messages = eval(chat_completion_input_str)
-                input_messages = [UserMessage(**x) for x in input_messages]
+                chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value])
+                input_messages = [UserMessage(**x) for x in chat_completion_input_json]
                 messages = []
                 if candidate.system_message:
                     messages.append(candidate.system_message)
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
index 884977803..6b607f1c7 100644
--- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
+++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
@@ -10,16 +10,19 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import json
 from typing import Any, Mapping
 
 from llama_stack.providers.utils.common.data_schema_validator import ColumnName
 
 
-def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]:
+def llama_stack_instruct_to_torchtune_instruct(
+    sample: Mapping[str, Any],
+) -> Mapping[str, Any]:
     assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, (
         "Invalid input row"
     )
-    input_messages = eval(str(sample[ColumnName.chat_completion_input.value]))
+    input_messages = json.loads(sample[ColumnName.chat_completion_input.value])
 
     assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message"
     input_message = input_messages[0]
@@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map
 def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]:
     assert ColumnName.dialog.value in sample, "Invalid input row"
     role_map = {"user": "human", "assistant": "gpt"}
-    dialog = eval(str(sample[ColumnName.dialog.value]))
+    dialog = json.loads(sample[ColumnName.dialog.value])
 
     assert len(dialog) > 1, "dialog must have at least 2 messagse"
     roles = []
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 967a3e44d..8ec23cd90 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -270,6 +270,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
         tool_config: Optional[ToolConfig] = None,
     ) -> AsyncGenerator:
         model = await self.model_store.get_model(model_id)
+        # This is to be consistent with OpenAI API and support vLLM <= v0.6.3
+        # References:
+        #   * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
+        #   * https://github.com/vllm-project/vllm/pull/10000
+        if not tools and tool_config is not None:
+            tool_config.tool_choice = ToolChoice.none
         request = ChatCompletionRequest(
             model=model.provider_resource_id,
             messages=messages,
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index 60556a6f3..87438fb6d 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -29,12 +29,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
index 2b117120c..e8dfaaf3c 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
@@ -31,12 +31,31 @@ The following environment variables can be configured:
 
 ## Prerequisite: Downloading Models
 
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
 
 ```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B           Llama3.2-11B-Vision-Instruct  Llama3.2-1B-Instruct  Llama3.2-90B-Vision-Instruct  Llama-Guard-3-8B
-Llama3.1-8B-Instruct  Llama3.2-1B                   Llama3.2-3B-Instruct  Llama-Guard-3-1B              Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model                                   ┃ Size     ┃ Modified Time       ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8     │ 1.53 GB  │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B                             │ 2.31 GB  │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M                        │ 0.02 GB  │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB  │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B                             │ 5.99 GB  │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B                             │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB  │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B                        │ 2.80 GB  │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4                   │ 0.43 GB  │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
 ```
 
 ## Running the Distribution
diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py
index 9a10622dd..bdd2d8a51 100644
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@@ -4,20 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
-from typing import Dict, List
 from uuid import uuid4
 
 import pytest
 from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.client_tool import ClientTool
+from llama_stack_client.lib.agents.client_tool import client_tool
 from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types import ToolResponseMessage
 from llama_stack_client.types.agents.turn_create_params import Document as AgentDocument
 from llama_stack_client.types.memory_insert_params import Document
-from llama_stack_client.types.shared.completion_message import CompletionMessage
 from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig
-from llama_stack_client.types.tool_def_param import Parameter
 
 from llama_stack.apis.agents.agents import (
     AgentConfig as Server__AgentConfig,
@@ -27,63 +22,22 @@ from llama_stack.apis.agents.agents import (
 )
 
 
-class TestClientTool(ClientTool):
-    """Tool to give boiling point of a liquid
-    Returns the correct value for polyjuice in Celcius and Fahrenheit
-    and returns -1 for other liquids
+@client_tool
+def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
     """
+    Returns the boiling point of a liquid in Celcius or Fahrenheit
 
-    def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:
-        assert len(messages) == 1, "Expected single message"
-
-        message = messages[0]
-
-        tool_call = message.tool_calls[0]
-
-        try:
-            response = self.run_impl(**tool_call.arguments)
-            response_str = json.dumps(response, ensure_ascii=False)
-        except Exception as e:
-            response_str = f"Error when running tool: {e}"
-
-        message = ToolResponseMessage(
-            role="tool",
-            call_id=tool_call.call_id,
-            tool_name=tool_call.tool_name,
-            content=response_str,
-        )
-        return message
-
-    def get_name(self) -> str:
-        return "get_boiling_point"
-
-    def get_description(self) -> str:
-        return "Get the boiling point of imaginary liquids (eg. polyjuice)"
-
-    def get_params_definition(self) -> Dict[str, Parameter]:
-        return {
-            "liquid_name": Parameter(
-                name="liquid_name",
-                parameter_type="string",
-                description="The name of the liquid",
-                required=True,
-            ),
-            "celcius": Parameter(
-                name="celcius",
-                parameter_type="boolean",
-                description="Whether to return the boiling point in Celcius",
-                required=False,
-            ),
-        }
-
-    def run_impl(self, liquid_name: str, celcius: bool = True) -> int:
-        if liquid_name.lower() == "polyjuice":
-            if celcius:
-                return -100
-            else:
-                return -212
+    :param liquid_name: The name of the liquid
+    :param celcius: Whether to return the boiling point in Celcius
+    :return: The boiling point of the liquid in Celcius or Fahrenheit
+    """
+    if liquid_name.lower() == "polyjuice":
+        if celcius:
+            return -100
         else:
-            return -1
+            return -212
+    else:
+        return -1
 
 
 @pytest.fixture(scope="session")
@@ -298,7 +252,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
 
 
 def test_custom_tool(llama_stack_client, agent_config):
-    client_tool = TestClientTool()
+    client_tool = get_boiling_point
     agent_config = {
         **agent_config,
         "toolgroups": ["builtin::websearch"],
@@ -326,7 +280,7 @@ def test_custom_tool(llama_stack_client, agent_config):
 
 def test_tool_choice(llama_stack_client, agent_config):
     def run_agent(tool_choice):
-        client_tool = TestClientTool()
+        client_tool = get_boiling_point
 
         test_agent_config = {
             **agent_config,
@@ -505,7 +459,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
 
 
 def test_create_turn_response(llama_stack_client, agent_config):
-    client_tool = TestClientTool()
+    client_tool = get_boiling_point
     agent_config = {
         **agent_config,
         "input_shields": [],
diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py
index 69d35d05d..075f927f7 100644
--- a/tests/client-sdk/inference/test_embedding.py
+++ b/tests/client-sdk/inference/test_embedding.py
@@ -76,6 +76,25 @@ DUMMY_IMAGE_URL = ImageContentItem(
 )
 DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
 SUPPORTED_PROVIDERS = {"remote::nvidia"}
+MODELS_SUPPORTING_MEDIA = {}
+MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
+MODELS_REQUIRING_TASK_TYPE = {
+    "nvidia/llama-3.2-nv-embedqa-1b-v2",
+    "nvidia/nv-embedqa-e5-v5",
+    "nvidia/nv-embedqa-mistral-7b-v2",
+    "snowflake/arctic-embed-l",
+}
+MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
+
+
+def default_task_type(model_id):
+    """
+    Some models require a task type parameter. This provides a default value for
+    testing those models.
+    """
+    if model_id in MODELS_REQUIRING_TASK_TYPE:
+        return {"task_type": "query"}
+    return {}
 
 
 @pytest.mark.parametrize(
@@ -92,7 +111,9 @@ SUPPORTED_PROVIDERS = {"remote::nvidia"}
 def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
+    response = llama_stack_client.inference.embeddings(
+        model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
+    )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
     assert isinstance(response.embeddings[0], list)
@@ -110,11 +131,14 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents, infere
         "list[url,string,base64,text]",
     ],
 )
-@pytest.mark.xfail(reason="Media is not supported")
 def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
+    if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
+        pytest.xfail(f"{embedding_model_id} doesn't support media")
+    response = llama_stack_client.inference.embeddings(
+        model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
+    )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
     assert isinstance(response.embeddings[0], list)
@@ -145,7 +169,10 @@ def test_embedding_truncation(
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
+        model_id=embedding_model_id,
+        contents=contents,
+        text_truncation=text_truncation,
+        **default_task_type(embedding_model_id),
     )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == 1
@@ -178,26 +205,36 @@ def test_embedding_truncation_error(
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     with pytest.raises(BadRequestError):
         llama_stack_client.inference.embeddings(
-            model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
+            model_id=embedding_model_id,
+            contents=[DUMMY_LONG_TEXT],
+            text_truncation=text_truncation,
+            **default_task_type(embedding_model_id),
         )
 
 
-@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
 def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
-    base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
+    if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
+        pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
+    base_response = llama_stack_client.inference.embeddings(
+        model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
+    )
     test_response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
+        model_id=embedding_model_id,
+        contents=[DUMMY_STRING],
+        **default_task_type(embedding_model_id),
+        output_dimension=32,
     )
     assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
     assert len(test_response.embeddings[0]) == 32
 
 
-@pytest.mark.xfail(reason="Only valid for model supporting task type")
 def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+    if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
+        pytest.xfail(f"{embedding_model_id} doesn't support task_type")
     query_embedding = llama_stack_client.inference.embeddings(
         model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
     )
@@ -220,7 +257,10 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     response = llama_stack_client.inference.embeddings(
-        model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
+        model_id=embedding_model_id,
+        contents=[DUMMY_STRING],
+        text_truncation=text_truncation,
+        **default_task_type(embedding_model_id),
     )
     assert isinstance(response, EmbeddingsResponse)
     assert len(response.embeddings) == 1
@@ -245,5 +285,8 @@ def test_embedding_text_truncation_error(
         pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
     with pytest.raises(BadRequestError):
         llama_stack_client.inference.embeddings(
-            model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
+            model_id=embedding_model_id,
+            contents=[DUMMY_STRING],
+            text_truncation=text_truncation,
+            **default_task_type(embedding_model_id),
         )