diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 2a9f4b6f7..6b98cad90 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -52,6 +52,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -97,6 +109,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -128,6 +152,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -159,6 +195,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -183,6 +231,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -219,6 +279,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -255,6 +327,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -286,6 +370,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -317,6 +413,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -362,6 +470,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -410,6 +530,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -438,6 +570,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -462,6 +606,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -492,6 +648,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -532,6 +700,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -570,6 +750,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -608,6 +800,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -648,6 +852,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -679,6 +895,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -719,6 +947,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -773,6 +1013,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -826,6 +1078,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -863,6 +1127,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -884,6 +1160,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -921,6 +1209,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -942,6 +1242,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -979,6 +1291,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1016,6 +1340,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1046,6 +1382,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1084,6 +1432,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1124,6 +1484,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1154,6 +1526,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1175,6 +1559,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1205,6 +1601,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1242,6 +1650,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1279,6 +1699,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1309,6 +1741,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1337,6 +1781,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1373,6 +1829,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1422,6 +1890,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1443,6 +1923,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1473,6 +1965,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1487,6 +1991,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1511,6 +2027,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1542,6 +2070,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1580,6 +2120,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1609,6 +2161,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1647,6 +2211,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1685,6 +2261,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1697,6 +2285,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1728,6 +2328,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1740,6 +2352,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1771,6 +2395,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1802,6 +2438,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1821,6 +2469,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1852,6 +2512,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1873,6 +2545,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1894,6 +2578,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1932,6 +2628,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1944,6 +2652,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1975,6 +2695,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -1994,6 +2726,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2025,6 +2769,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2037,6 +2793,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2068,6 +2836,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2098,6 +2878,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2117,6 +2909,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2141,6 +2945,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2172,6 +2988,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2203,6 +3031,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2234,6 +3074,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2265,6 +3117,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2296,6 +3160,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2332,6 +3208,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2391,6 +3279,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2431,6 +3331,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2455,6 +3367,18 @@
"responses": {
"200": {
"description": "OK"
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2486,6 +3410,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2517,6 +3453,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2548,6 +3496,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2579,6 +3539,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2610,6 +3582,18 @@
}
}
}
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
}
},
"tags": [
@@ -2623,6 +3607,35 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": {
"schemas": {
+ "Error": {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "integer",
+ "description": "HTTP status code"
+ },
+ "title": {
+ "type": "string",
+ "description": "Error title, a short summary of the error which is invariant for an error type"
+ },
+ "detail": {
+ "type": "string",
+ "description": "Error detail, a longer human-readable description of the error"
+ },
+ "instance": {
+ "type": "string",
+ "description": "(Optional) A URL which can be used to retrieve more information about the specific occurrence of the error"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "status",
+ "title",
+ "detail"
+ ],
+ "title": "Error",
+ "description": "Error response from the API. Roughly follows RFC 7807."
+ },
"AppendRowsRequest": {
"type": "object",
"properties": {
@@ -8741,7 +9754,68 @@
"title": "VersionInfo"
}
},
- "responses": {}
+ "responses": {
+ "BadRequest400": {
+ "description": "The request was invalid or malformed",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/Error"
+ },
+ "example": {
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "The request was invalid or malformed"
+ }
+ }
+ }
+ },
+ "TooManyRequests429": {
+ "description": "The client has sent too many requests in a given amount of time",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/Error"
+ },
+ "example": {
+ "status": 429,
+ "title": "Too Many Requests",
+ "detail": "You have exceeded the rate limit. Please try again later."
+ }
+ }
+ }
+ },
+ "InternalServerError500": {
+ "description": "The server encountered an unexpected error",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/Error"
+ },
+ "example": {
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred. Our team has been notified."
+ }
+ }
+ }
+ },
+ "DefaultError": {
+ "description": "An unexpected error occurred",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/Error"
+ },
+ "example": {
+ "status": 0,
+ "title": "Error",
+ "detail": "An unexpected error occurred"
+ }
+ }
+ }
+ }
+ }
},
"security": [
{
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a2329e47a..13f7edc4b 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -19,6 +19,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/PaginatedRowsResult'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
description: ''
@@ -47,6 +57,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
description: ''
@@ -66,6 +86,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- BatchInference (Coming Soon)
description: ''
@@ -85,6 +115,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- BatchInference (Coming Soon)
description: ''
@@ -100,6 +140,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
@@ -124,6 +174,16 @@ paths:
text/event-stream:
schema:
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Inference
description: >-
@@ -149,6 +209,16 @@ paths:
text/event-stream:
schema:
$ref: '#/components/schemas/CompletionResponseStreamChunk'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Inference
description: >-
@@ -169,6 +239,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/AgentCreateResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -188,6 +268,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/AgentSessionCreateResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -217,6 +307,16 @@ paths:
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -246,6 +346,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListBucketResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Files (Coming Soon)
description: List all buckets.
@@ -263,6 +373,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/FileUploadResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Files (Coming Soon)
description: >-
@@ -279,6 +399,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -297,6 +427,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Session'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -322,6 +462,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -345,6 +495,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/FileResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Files (Coming Soon)
description: >-
@@ -371,6 +531,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/FileResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Files (Coming Soon)
description: >-
@@ -401,6 +571,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Inference
description: >-
@@ -421,6 +601,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Eval
description: ''
@@ -445,6 +635,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/AgentStepResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -478,6 +678,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Turn'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: ''
@@ -508,6 +718,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/Benchmark'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
description: ''
@@ -528,6 +748,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/Dataset'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Datasets
description: ''
@@ -541,6 +771,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Datasets
description: ''
@@ -561,6 +801,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/Model'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Models
description: ''
@@ -574,6 +824,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Models
description: ''
@@ -594,6 +854,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/ScoringFn'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
description: ''
@@ -614,6 +884,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/Shield'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Shields
description: ''
@@ -632,6 +912,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Span'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
@@ -655,6 +945,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
@@ -679,6 +979,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Tool'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
description: ''
@@ -697,6 +1007,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ToolGroup'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
description: ''
@@ -710,6 +1030,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
description: Unregister a tool group
@@ -728,6 +1058,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Trace'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
@@ -748,6 +1088,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
@@ -768,6 +1118,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/PostTrainingJobStatusResponse'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
@@ -786,6 +1146,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListPostTrainingJobsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
@@ -801,6 +1171,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/FileUploadResponse'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Files (Coming Soon)
description: >-
@@ -822,6 +1202,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/FileResponse'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Files (Coming Soon)
description: >-
@@ -852,6 +1242,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/VectorDB'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- VectorDBs
description: ''
@@ -865,6 +1265,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- VectorDBs
description: ''
@@ -883,6 +1293,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/HealthInfo'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Inspect
description: ''
@@ -892,6 +1312,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolRuntime
description: >-
@@ -908,6 +1338,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- VectorIO
description: ''
@@ -927,6 +1367,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ToolInvocationResult'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolRuntime
description: Run a tool with the given arguments
@@ -948,6 +1398,16 @@ paths:
oneOf:
- $ref: '#/components/schemas/JobStatus'
- type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Eval
description: ''
@@ -966,6 +1426,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Eval
description: ''
@@ -989,6 +1459,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Eval
description: ''
@@ -1012,6 +1492,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListBenchmarksResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
description: ''
@@ -1020,6 +1510,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
description: ''
@@ -1039,6 +1539,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListDatasetsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Datasets
description: ''
@@ -1047,6 +1557,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Datasets
description: ''
@@ -1066,6 +1586,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListFileResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Files (Coming Soon)
description: List all files in a bucket.
@@ -1085,6 +1615,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListModelsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Models
description: ''
@@ -1097,6 +1637,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Model'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Models
description: ''
@@ -1116,6 +1666,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListProvidersResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Inspect
description: ''
@@ -1129,6 +1689,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListRoutesResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Inspect
description: ''
@@ -1142,6 +1712,16 @@ paths:
application/jsonl:
schema:
$ref: '#/components/schemas/ToolDef'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolRuntime
description: ''
@@ -1165,6 +1745,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListScoringFunctionsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
description: ''
@@ -1173,6 +1763,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ScoringFunctions
description: ''
@@ -1192,6 +1792,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListShieldsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Shields
description: ''
@@ -1204,6 +1814,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Shield'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Shields
description: ''
@@ -1223,6 +1843,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListToolGroupsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
description: List tool groups with optional provider
@@ -1231,6 +1861,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
description: Register a tool group
@@ -1250,6 +1890,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListToolsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolGroups
description: List tools with optional tool group
@@ -1268,6 +1918,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ListVectorDBsResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- VectorDBs
description: ''
@@ -1280,6 +1940,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/VectorDB'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- VectorDBs
description: ''
@@ -1295,6 +1965,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
@@ -1314,6 +1994,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
@@ -1333,6 +2023,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/RAGQueryResult'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- ToolRuntime
description: >-
@@ -1353,6 +2053,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/QueryChunksResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- VectorIO
description: ''
@@ -1372,6 +2082,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
@@ -1391,6 +2111,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
@@ -1415,6 +2145,16 @@ paths:
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Agents
description: >-
@@ -1457,6 +2197,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/Job'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Eval
description: ''
@@ -1481,6 +2231,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/RunShieldResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Safety
description: ''
@@ -1496,6 +2256,16 @@ paths:
responses:
'200':
description: OK
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
@@ -1515,6 +2285,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ScoreResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Scoring
description: ''
@@ -1534,6 +2314,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/ScoreBatchResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Scoring
description: ''
@@ -1553,6 +2343,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
@@ -1572,6 +2372,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- SyntheticDataGeneration (Coming Soon)
description: ''
@@ -1591,6 +2401,16 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/VersionInfo'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
tags:
- Inspect
description: ''
@@ -1599,6 +2419,34 @@ jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
schemas:
+ Error:
+ type: object
+ properties:
+ status:
+ type: integer
+ description: HTTP status code
+ title:
+ type: string
+ description: >-
+ Error title, a short summary of the error which is invariant for an error
+ type
+ detail:
+ type: string
+ description: >-
+ Error detail, a longer human-readable description of the error
+ instance:
+ type: string
+ description: >-
+ (Optional) A URL which can be used to retrieve more information about
+ the specific occurrence of the error
+ additionalProperties: false
+ required:
+ - status
+ - title
+ - detail
+ title: Error
+ description: >-
+ Error response from the API. Roughly follows RFC 7807.
AppendRowsRequest:
type: object
properties:
@@ -5626,7 +6474,51 @@ components:
required:
- version
title: VersionInfo
- responses: {}
+ responses:
+ BadRequest400:
+ description: The request was invalid or malformed
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ status: 400
+ title: Bad Request
+ detail: The request was invalid or malformed
+ TooManyRequests429:
+ description: >-
+ The client has sent too many requests in a given amount of time
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ status: 429
+ title: Too Many Requests
+ detail: >-
+ You have exceeded the rate limit. Please try again later.
+ InternalServerError500:
+ description: >-
+ The server encountered an unexpected error
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ status: 500
+ title: Internal Server Error
+ detail: >-
+ An unexpected error occurred. Our team has been notified.
+ DefaultError:
+ description: An unexpected error occurred
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ status: 0
+ title: Error
+ detail: An unexpected error occurred
security:
- Default: []
tags:
diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index 8ae6fed24..21436327e 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -1145,6 +1145,7 @@
}
],
"source": [
+ "# NBVAL_SKIP\n",
"from pydantic import BaseModel\n",
"\n",
"\n",
@@ -2885,7 +2886,6 @@
}
],
"source": [
- "# NBVAL_SKIP\n",
"from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
@@ -4326,7 +4326,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "toolchain",
+ "display_name": "master",
"language": "python",
"name": "python3"
},
diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index dcbee7d2f..a2553f905 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -55,6 +55,7 @@ def main(output_dir: str):
a set of endpoints and their corresponding interfaces that are tailored to
best leverage Llama Models.""",
),
+ include_standard_error_responses=True,
),
)
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 4220cfc05..91f32e6c8 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -10,6 +10,7 @@ import typing
from dataclasses import make_dataclass
from typing import Any, Dict, Set, Union
+from llama_stack.apis.datatypes import Error
from llama_stack.strong_typing.core import JsonType
from llama_stack.strong_typing.docstring import Docstring, parse_type
from llama_stack.strong_typing.inspection import (
@@ -434,6 +435,75 @@ class Generator:
)
self.schema_builder = SchemaBuilder(schema_generator)
self.responses = {}
+
+ # Create standard error responses
+ self._create_standard_error_responses()
+
+ def _create_standard_error_responses(self) -> None:
+ """
+ Creates standard error responses that can be reused across operations.
+ These will be added to the components.responses section of the OpenAPI document.
+ """
+ # Get the Error schema
+ error_schema = self.schema_builder.classdef_to_ref(Error)
+
+ # Create standard error responses
+ self.responses["BadRequest400"] = Response(
+ description="The request was invalid or malformed",
+ content={
+ "application/json": MediaType(
+ schema=error_schema,
+ example={
+ "status": 400,
+ "title": "Bad Request",
+ "detail": "The request was invalid or malformed",
+ }
+ )
+ }
+ )
+
+ self.responses["TooManyRequests429"] = Response(
+ description="The client has sent too many requests in a given amount of time",
+ content={
+ "application/json": MediaType(
+ schema=error_schema,
+ example={
+ "status": 429,
+ "title": "Too Many Requests",
+ "detail": "You have exceeded the rate limit. Please try again later.",
+ }
+ )
+ }
+ )
+
+ self.responses["InternalServerError500"] = Response(
+ description="The server encountered an unexpected error",
+ content={
+ "application/json": MediaType(
+ schema=error_schema,
+ example={
+ "status": 500,
+ "title": "Internal Server Error",
+ "detail": "An unexpected error occurred. Our team has been notified.",
+ }
+ )
+ }
+ )
+
+ # Add a default error response for any unhandled error cases
+ self.responses["DefaultError"] = Response(
+ description="An unexpected error occurred",
+ content={
+ "application/json": MediaType(
+ schema=error_schema,
+ example={
+ "status": 0,
+ "title": "Error",
+ "detail": "An unexpected error occurred",
+ }
+ )
+ }
+ )
def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
# Don't include schema definition in the tag description because for one,
@@ -649,6 +719,18 @@ class Generator:
responses.update(response_builder.build_response(response_options))
assert len(responses.keys()) > 0, f"No responses found for {op.name}"
+
+ # Add standard error response references
+ if self.options.include_standard_error_responses:
+ if "400" not in responses:
+ responses["400"] = ResponseRef("BadRequest400")
+ if "429" not in responses:
+ responses["429"] = ResponseRef("TooManyRequests429")
+ if "500" not in responses:
+ responses["500"] = ResponseRef("InternalServerError500")
+ if "default" not in responses:
+ responses["default"] = ResponseRef("DefaultError")
+
if op.event_type is not None:
builder = ContentBuilder(self.schema_builder)
callbacks = {
diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py
index f80da453b..edc861ad5 100644
--- a/docs/openapi_generator/pyopenapi/options.py
+++ b/docs/openapi_generator/pyopenapi/options.py
@@ -35,6 +35,7 @@ class Options:
:param error_wrapper: True if errors are encapsulated in an error object wrapper.
:param property_description_fun: Custom transformation function to apply to class property documentation strings.
:param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types.
+ :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations.
"""
server: Server
@@ -52,6 +53,7 @@ class Options:
error_wrapper: bool = False
property_description_fun: Optional[Callable[[type, str, str], str]] = None
captions: Optional[Dict[str, str]] = None
+ include_standard_error_responses: bool = True
default_captions: ClassVar[Dict[str, str]] = {
"Operations": "Operations",
diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index 9cb1a402f..20a835201 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -106,7 +106,7 @@ It would be best to start with a template and understand the structure of the co
llama stack build
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
-> Enter the image type you want your Llama Stack to be built as (container or conda): conda
+> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda
Llama Stack is composed of several APIs working together. Let's select
the provider types (implementations) you want to use for these APIs.
@@ -187,7 +187,7 @@ usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-i
[--tls-certfile TLS_CERTFILE] [--image-type {conda,container,venv}]
config
-start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
+Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
positional arguments:
config Path to config file to use for the run
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index b183757db..b8d1b1714 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -41,12 +41,31 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B
-Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model ┃ Size ┃ Modified Time ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
```
## Running the Distribution
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index 9aeb7a88b..a49175e22 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -41,12 +41,31 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B
-Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model ┃ Size ┃ Modified Time ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
```
## Running the Distribution
diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md
index ecef20d55..eb0dcf392 100644
--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@@ -38,7 +38,7 @@ The API is **exactly identical** for both clients.
:::{dropdown} Starting up the Llama Stack server
The Llama Stack server can be configured flexibly so you can mix-and-match various providers for its individual API components -- beyond Inference, these include Vector IO, Agents, Telemetry, Evals, Post Training, etc.
-To get started quickly, we provide various container images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the container image.
+To get started quickly, we provide various container images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the container image. If you'd like to build your own image or customize the configurations, please check out [this guide](../references/index.md).
Lets setup some environment variables that we will use in the rest of the guide.
```bash
diff --git a/docs/source/references/llama_cli_reference/download_models.md b/docs/source/references/llama_cli_reference/download_models.md
index 6c791bcb7..ca470f8c2 100644
--- a/docs/source/references/llama_cli_reference/download_models.md
+++ b/docs/source/references/llama_cli_reference/download_models.md
@@ -129,3 +129,35 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern
**Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
> **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
+
+## List the downloaded models
+
+To list the downloaded models with the following command:
+```
+llama model list --downloaded
+```
+
+You should see a table like this:
+```
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model ┃ Size ┃ Modified Time ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
+```
diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md
index a43666963..8a38fc3ae 100644
--- a/docs/source/references/llama_cli_reference/index.md
+++ b/docs/source/references/llama_cli_reference/index.md
@@ -154,6 +154,38 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern
> **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored.
+## List the downloaded models
+
+To list the downloaded models with the following command:
+```
+llama model list --downloaded
+```
+
+You should see a table like this:
+```
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model ┃ Size ┃ Modified Time ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
+```
+
## Understand the models
The `llama model` command helps you explore the model’s interface.
diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py
index 6df93052c..842a2b63d 100644
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@@ -5,6 +5,9 @@
# the root directory of this source tree.
from enum import Enum
+from typing import Optional
+
+from pydantic import BaseModel
from llama_stack.schema_utils import json_schema_type
@@ -33,3 +36,20 @@ class Api(Enum):
# built-in API
inspect = "inspect"
+
+
+@json_schema_type
+class Error(BaseModel):
+ """
+ Error response from the API. Roughly follows RFC 7807.
+
+ :param status: HTTP status code
+ :param title: Error title, a short summary of the error which is invariant for an error type
+ :param detail: Error detail, a longer human-readable description of the error
+ :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error
+ """
+
+ status: int
+ title: str
+ detail: str
+ instance: Optional[str] = None
diff --git a/llama_stack/cli/model/prompt_format.py b/llama_stack/cli/model/prompt_format.py
index ea9596ba5..516c67634 100644
--- a/llama_stack/cli/model/prompt_format.py
+++ b/llama_stack/cli/model/prompt_format.py
@@ -9,6 +9,7 @@ import textwrap
from io import StringIO
from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
from llama_stack.models.llama.datatypes import CoreModelId, ModelFamily, is_multimodal, model_family
@@ -48,7 +49,26 @@ class ModelPromptFormat(Subcommand):
supported_model_ids = [
m for m in CoreModelId if model_family(m) in {ModelFamily.llama3_1, ModelFamily.llama3_2}
]
- model_str = "\n".join([m.value for m in supported_model_ids])
+
+ model_list = [m.value for m in supported_model_ids]
+ model_str = "\n".join(model_list)
+
+ if args.list:
+ headers = ["Model(s)"]
+ rows = []
+ for m in model_list:
+ rows.append(
+ [
+ m,
+ ]
+ )
+ print_table(
+ rows,
+ headers,
+ separate_rows=True,
+ )
+ return
+
try:
model_id = CoreModelId(args.model_name)
except ValueError:
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index 89db368db..baa7d2e32 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -141,7 +141,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
completer=WordCompleter(available_providers),
complete_while_typing=True,
validator=Validator.from_callable(
- lambda x: x in available_providers,
+ lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847
error_message="Invalid provider, use to see options",
),
)
diff --git a/llama_stack/cli/tests/test_stack_config.py b/llama_stack/cli/tests/test_stack_config.py
index 2b7b2b210..333f86e38 100644
--- a/llama_stack/cli/tests/test_stack_config.py
+++ b/llama_stack/cli/tests/test_stack_config.py
@@ -112,7 +112,7 @@ def test_parse_and_maybe_upgrade_config_old_format(old_config):
inference_providers = result.providers["inference"]
assert len(inference_providers) == 2
- assert set(x.provider_id for x in inference_providers) == {
+ assert {x.provider_id for x in inference_providers} == {
"remote::ollama-00",
"meta-reference-01",
}
diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index 2b43b8128..3d808a4a4 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -15,7 +15,6 @@ from termcolor import cprint
from llama_stack.distribution.datatypes import BuildConfig, Provider
from llama_stack.distribution.distribution import get_provider_registry
-from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR
from llama_stack.distribution.utils.exec import run_command, run_with_pty
from llama_stack.distribution.utils.image_types import ImageType
from llama_stack.providers.datatypes import Api
@@ -103,8 +102,6 @@ def build_image(
template_or_config,
image_name,
container_base,
- str(build_file_path),
- str(BUILDS_BASE_DIR / ImageType.container.value),
" ".join(normal_deps),
]
elif build_config.image_type == ImageType.conda.value:
diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh
index 31b3e1b21..1eac2ee08 100755
--- a/llama_stack/distribution/build_conda_env.sh
+++ b/llama_stack/distribution/build_conda_env.sh
@@ -52,7 +52,7 @@ ensure_conda_env_python310() {
local python_version="3.10"
# Check if conda command is available
- if ! command -v conda &>/dev/null; then
+ if ! is_command_available conda; then
printf "${RED}Error: conda command not found. Is Conda installed and in your PATH?${NC}" >&2
exit 1
fi
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 08941a538..68f8a0863 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
@@ -20,26 +20,27 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500}
# mounting is not supported by docker buildx, so we use COPY instead
USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-}
-if [ "$#" -lt 6 ]; then
+if [ "$#" -lt 4 ]; then
# This only works for templates
- echo "Usage: $0 []" >&2
+ echo "Usage: $0 []" >&2
exit 1
fi
set -euo pipefail
template_or_config="$1"
-image_name="$2"
-container_base="$3"
-build_file_path="$4"
-host_build_dir="$5"
-pip_dependencies="$6"
-special_pip_deps="${7:-}"
+shift
+image_name="$1"
+shift
+container_base="$1"
+shift
+pip_dependencies="$1"
+shift
+special_pip_deps="${1:-}"
# Define color codes
RED='\033[0;31m'
-GREEN='\033[0;32m'
NC='\033[0m' # No Color
CONTAINER_BINARY=${CONTAINER_BINARY:-docker}
@@ -47,8 +48,10 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-}
TEMP_DIR=$(mktemp -d)
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+source "$SCRIPT_DIR/common.sh"
+
add_to_container() {
- local input
output_file="$TEMP_DIR/Containerfile"
if [ -t 0 ]; then
printf '%s\n' "$1" >>"$output_file"
@@ -58,15 +61,21 @@ add_to_container() {
fi
}
+# Check if container command is available
+if ! is_command_available $CONTAINER_BINARY; then
+ printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
+ exit 1
+fi
+
# Update and install UBI9 components if UBI9 base image is used
if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
add_to_container << EOF
FROM $container_base
WORKDIR /app
-RUN microdnf -y update && microdnf install -y iputils net-tools wget \
+RUN dnf -y update && dnf install -y iputils net-tools wget \
vim-minimal python3.11 python3.11-pip python3.11-wheel \
- python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && microdnf clean all
+ python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
ENV UV_SYSTEM_PYTHON=1
RUN pip install uv
@@ -165,6 +174,11 @@ EOF
fi
fi
+# remove uv after installation
+ add_to_container << EOF
+RUN pip uninstall -y uv
+EOF
+
# if template_or_config ends with .yaml, it is not a template and we should not use the --template flag
if [[ "$template_or_config" != *.yaml ]]; then
add_to_container << EOF
@@ -185,26 +199,31 @@ RUN mkdir -p /.llama /.cache
RUN chmod -R g+rw /app /.llama /.cache
EOF
-printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n"
-cat $TEMP_DIR/Containerfile
+printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
+cat "$TEMP_DIR"/Containerfile
printf "\n"
-mounts=""
+# Start building the CLI arguments
+CLI_ARGS=()
+
+# Read CONTAINER_OPTS and put it in an array
+read -ra CLI_ARGS <<< "$CONTAINER_OPTS"
+
if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then
if [ -n "$LLAMA_STACK_DIR" ]; then
- mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):$stack_mount"
+ CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount")
fi
if [ -n "$LLAMA_MODELS_DIR" ]; then
- mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount"
+ CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_MODELS_DIR"):$models_mount")
fi
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
- mounts="$mounts -v $(readlink -f $LLAMA_STACK_CLIENT_DIR):$client_mount"
+ CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount")
fi
fi
-if command -v selinuxenabled &>/dev/null && selinuxenabled; then
+if is_command_available selinuxenabled && selinuxenabled; then
# Disable SELinux labels -- we don't want to relabel the llama-stack source dir
- CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable"
+ CLI_ARGS+=("--security-opt" "label=disable")
fi
# Set version tag based on PyPI version
@@ -225,11 +244,11 @@ image_tag="$image_name:$version_tag"
# Detect platform architecture
ARCH=$(uname -m)
if [ -n "$BUILD_PLATFORM" ]; then
- PLATFORM="--platform $BUILD_PLATFORM"
+ CLI_ARGS+=("--platform $BUILD_PLATFORM")
elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
- PLATFORM="--platform linux/arm64"
+ CLI_ARGS+=("--platform" "linux/arm64")
elif [ "$ARCH" = "x86_64" ]; then
- PLATFORM="--platform linux/amd64"
+ CLI_ARGS+=("--platform" "linux/amd64")
else
echo "Unsupported architecture: $ARCH"
exit 1
@@ -238,8 +257,13 @@ fi
echo "PWD: $(pwd)"
echo "Containerfile: $TEMP_DIR/Containerfile"
set -x
-$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag \
- -f "$TEMP_DIR/Containerfile" "." $mounts --progress=plain
+
+$CONTAINER_BINARY build \
+ "${CLI_ARGS[@]}" \
+ -t "$image_tag" \
+ -f "$TEMP_DIR/Containerfile" \
+ "." \
+ --progress=plain
# clean up tmp/configs
set +x
diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py
index 384e2c3c8..308081415 100644
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import Api, ProviderSpec
def stack_apis() -> List[Api]:
- return [v for v in Api]
+ return list(Api)
class AutoRoutedApiInfo(BaseModel):
@@ -55,7 +55,7 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]:
def providable_apis() -> List[Api]:
- routing_table_apis = set(x.routing_table_api for x in builtin_automatically_routed_apis())
+ routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
return [api for api in Api if api not in routing_table_apis and api != Api.inspect]
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index 0bc2e774c..69a096e97 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -115,8 +115,8 @@ async def resolve_impls(
- flatmaps, sorts and resolves the providers in dependency order
- for each API, produces either a (local, passthrough or router) implementation
"""
- routing_table_apis = set(x.routing_table_api for x in builtin_automatically_routed_apis())
- router_apis = set(x.router_api for x in builtin_automatically_routed_apis())
+ routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()}
+ router_apis = {x.router_api for x in builtin_automatically_routed_apis()}
providers_with_specs = {}
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index c2434e517..80e9ecb7c 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -318,14 +318,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
)
model = await self.get_object_by_identifier("model", embedding_model)
if model is None:
- if embedding_model == "all-MiniLM-L6-v2":
- raise ValueError(
- "Embeddings are now served via Inference providers. "
- "Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. "
- "See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example."
- )
- else:
- raise ValueError(f"Model {embedding_model} not found")
+ raise ValueError(f"Model {embedding_model} not found")
if model.model_type != ModelType.embedding:
raise ValueError(f"Model {embedding_model} is not an embedding model")
if "embedding_dimension" not in model.metadata:
diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py
index 202c9322f..4a916321d 100644
--- a/llama_stack/distribution/ui/page/playground/rag.py
+++ b/llama_stack/distribution/ui/page/playground/rag.py
@@ -134,7 +134,7 @@ def rag_chat_page():
dict(
name="builtin::rag/knowledge_search",
args={
- "vector_db_ids": [vector_db_id for vector_db_id in selected_vector_dbs],
+ "vector_db_ids": list(selected_vector_dbs),
},
)
],
diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py
index 82bf00e3c..aae6b35d8 100644
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@@ -46,7 +46,7 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
conda_env_info = json.loads(subprocess.check_output(["conda", "info", "--envs", "--json"]).decode())
envs = conda_env_info["envs"]
for envpath in envs:
- if envpath.endswith(env_name):
+ if os.path.basename(envpath) == env_name:
return envpath
return None
diff --git a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
index 27b1a3502..74a3ae4f0 100644
--- a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
+++ b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
@@ -226,10 +226,9 @@ class FunctionTagCustomToolGenerator(PromptTemplateGeneratorBase):
class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801
DEFAULT_PROMPT = textwrap.dedent(
"""
+ You are a helpful assistant. You have access to functions, but you should only use them if they are required.
You are an expert in composing functions. You are given a question and a set of possible functions.
- Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
- If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
- also point it out. You should only return the function call in tools call sections.
+ Based on the question, you may or may not need to make one function/tool call to achieve the purpose.
{{ function_description }}
""".strip("\n")
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
index 7995f4b31..97d2c12a0 100644
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@@ -611,8 +611,17 @@ class ChatAgent(ShieldRunnerMixin):
if event.stop_reason is not None:
stop_reason = event.stop_reason
span.set_attribute("stop_reason", stop_reason)
- span.set_attribute("input", [m.model_dump_json() for m in input_messages])
- span.set_attribute("output", f"content: {content} tool_calls: {tool_calls}")
+ span.set_attribute(
+ "input",
+ json.dumps([json.loads(m.model_dump_json()) for m in input_messages]),
+ )
+ output_attr = json.dumps(
+ {
+ "content": content,
+ "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls],
+ }
+ )
+ span.set_attribute("output", output_attr)
n_iter += 1
await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter)
@@ -796,10 +805,10 @@ class ChatAgent(ShieldRunnerMixin):
self, toolgroups_for_turn: Optional[List[AgentToolGroup]] = None
) -> Tuple[List[ToolDefinition], Dict[str, str]]:
# Determine which tools to include
- agent_config_toolgroups = set(
- (toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup)
+ agent_config_toolgroups = {
+ toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup
for toolgroup in self.agent_config.toolgroups
- )
+ }
toolgroups_for_turn_set = (
agent_config_toolgroups
if toolgroups_for_turn is None
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py
index 18d408a31..a01f7f1f3 100644
--- a/llama_stack/providers/inline/eval/meta_reference/eval.py
+++ b/llama_stack/providers/inline/eval/meta_reference/eval.py
@@ -3,6 +3,7 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+import json
from typing import Any, Dict, List, Optional
from tqdm import tqdm
@@ -86,7 +87,6 @@ class MetaReferenceEvalImpl(
) -> Job:
task_def = self.benchmarks[benchmark_id]
dataset_id = task_def.dataset_id
- candidate = task_config.eval_candidate
scoring_functions = task_def.scoring_functions
dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.eval.value))
@@ -117,7 +117,7 @@ class MetaReferenceEvalImpl(
generations = []
for i, x in tqdm(enumerate(input_rows)):
assert ColumnName.chat_completion_input.value in x, "Invalid input row"
- input_messages = eval(str(x[ColumnName.chat_completion_input.value]))
+ input_messages = json.loads(x[ColumnName.chat_completion_input.value])
input_messages = [UserMessage(**x) for x in input_messages]
# NOTE: only single-turn agent generation is supported. Create a new session for each input row
@@ -159,7 +159,7 @@ class MetaReferenceEvalImpl(
generations = []
for x in tqdm(input_rows):
if ColumnName.completion_input.value in x:
- input_content = eval(str(x[ColumnName.completion_input.value]))
+ input_content = json.loads(x[ColumnName.completion_input.value])
response = await self.inference_api.completion(
model=candidate.model,
content=input_content,
@@ -167,9 +167,8 @@ class MetaReferenceEvalImpl(
)
generations.append({ColumnName.generated_answer.value: response.completion_message.content})
elif ColumnName.chat_completion_input.value in x:
- chat_completion_input_str = str(x[ColumnName.chat_completion_input.value])
- input_messages = eval(chat_completion_input_str)
- input_messages = [UserMessage(**x) for x in input_messages]
+ chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value])
+ input_messages = [UserMessage(**x) for x in chat_completion_input_json]
messages = []
if candidate.system_message:
messages.append(candidate.system_message)
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index 763d9664d..516ac1ad8 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -208,7 +208,6 @@ class MetaReferenceInferenceImpl(
logprobs = []
stop_reason = None
- tokenizer = self.generator.formatter.tokenizer
for token_result in self.generator.completion(request):
tokens.append(token_result.token)
if token_result.text == "<|eot_id|>":
diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
index 658267f7f..91d0445ab 100644
--- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
+++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
@@ -207,7 +207,7 @@ def maybe_parse_message(maybe_json: Optional[str]) -> Optional[ProcessingMessage
return parse_message(maybe_json)
except json.JSONDecodeError:
return None
- except ValueError as e:
+ except ValueError:
return None
@@ -352,7 +352,7 @@ class ModelParallelProcessGroup:
if isinstance(obj, TaskResponse):
yield obj.result
- except GeneratorExit as e:
+ except GeneratorExit:
self.request_socket.send(encode_msg(CancelSentinel()))
while True:
obj_json = self.request_socket.send()
diff --git a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py
index 014a26f09..cecb66dd3 100644
--- a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py
+++ b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py
@@ -7,6 +7,9 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.
+# The file gets a special treatment for now?
+# ruff: noqa: N803
+
import unittest
import torch
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
index 884977803..6b607f1c7 100644
--- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
+++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
@@ -10,16 +10,19 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
+import json
from typing import Any, Mapping
from llama_stack.providers.utils.common.data_schema_validator import ColumnName
-def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]:
+def llama_stack_instruct_to_torchtune_instruct(
+ sample: Mapping[str, Any],
+) -> Mapping[str, Any]:
assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, (
"Invalid input row"
)
- input_messages = eval(str(sample[ColumnName.chat_completion_input.value]))
+ input_messages = json.loads(sample[ColumnName.chat_completion_input.value])
assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message"
input_message = input_messages[0]
@@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map
def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]:
assert ColumnName.dialog.value in sample, "Invalid input row"
role_map = {"user": "human", "assistant": "gpt"}
- dialog = eval(str(sample[ColumnName.dialog.value]))
+ dialog = json.loads(sample[ColumnName.dialog.value])
assert len(dialog) > 1, "dialog must have at least 2 messagse"
roles = []
diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
index 41387474f..c88787f18 100644
--- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
+++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
@@ -264,7 +264,7 @@ class LoraFinetuningSingleDevice:
)
self.adapter_params = get_adapter_params(model)
- self._is_dora = any(["magnitude" in k for k in self.adapter_params.keys()])
+ self._is_dora = any("magnitude" in k for k in self.adapter_params.keys())
set_trainable_params(model, self.adapter_params)
diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/llama_stack/providers/inline/scoring/braintrust/braintrust.py
index be0f023f3..a48b6b58b 100644
--- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py
+++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py
@@ -133,7 +133,7 @@ class BraintrustScoringImpl(
async def shutdown(self) -> None: ...
async def list_scoring_functions(self) -> List[ScoringFn]:
- scoring_fn_defs_list = [x for x in self.supported_fn_defs_registry.values()]
+ scoring_fn_defs_list = list(self.supported_fn_defs_registry.values())
for f in scoring_fn_defs_list:
assert f.identifier.startswith("braintrust"), (
"All braintrust scoring fn must have identifier prefixed with 'braintrust'! "
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 2ca7dd578..db9e176ee 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -198,7 +198,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
tool_config: Optional[ToolConfig] = None,
) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
if tool_prompt_format:
- warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring")
+ warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring", stacklevel=2)
await check_health(self._config) # this raises errors
diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
index 1849fda6d..0582cb816 100644
--- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py
+++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py
@@ -106,7 +106,7 @@ async def convert_chat_completion_request(
payload.update(temperature=strategy.temperature)
elif isinstance(strategy, TopKSamplingStrategy):
if strategy.top_k != -1 and strategy.top_k < 1:
- warnings.warn("top_k must be -1 or >= 1")
+ warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
nvext.update(top_k=strategy.top_k)
elif isinstance(strategy, GreedySamplingStrategy):
nvext.update(top_k=-1)
@@ -168,7 +168,7 @@ def convert_completion_request(
payload.update(top_p=request.sampling_params.top_p)
elif request.sampling_params.strategy == "top_k":
if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1:
- warnings.warn("top_k must be -1 or >= 1")
+ warnings.warn("top_k must be -1 or >= 1", stacklevel=2)
nvext.update(top_k=request.sampling_params.top_k)
elif request.sampling_params.strategy == "greedy":
nvext.update(top_k=-1)
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 967a3e44d..8ec23cd90 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -270,6 +270,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
tool_config: Optional[ToolConfig] = None,
) -> AsyncGenerator:
model = await self.model_store.get_model(model_id)
+ # This is to be consistent with OpenAI API and support vLLM <= v0.6.3
+ # References:
+ # * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
+ # * https://github.com/vllm-project/vllm/pull/10000
+ if not tools and tool_config is not None:
+ tool_config.tool_choice = ToolChoice.none
request = ChatCompletionRequest(
model=model.provider_resource_id,
messages=messages,
diff --git a/llama_stack/providers/tests/eval/test_eval.py b/llama_stack/providers/tests/eval/test_eval.py
index ad80b8601..9ce3a972b 100644
--- a/llama_stack/providers/tests/eval/test_eval.py
+++ b/llama_stack/providers/tests/eval/test_eval.py
@@ -39,12 +39,11 @@ class Testeval:
@pytest.mark.asyncio
async def test_eval_evaluate_rows(self, eval_stack, inference_model, judge_model):
- eval_impl, benchmarks_impl, datasetio_impl, datasets_impl, models_impl = (
+ eval_impl, benchmarks_impl, datasetio_impl, datasets_impl = (
eval_stack[Api.eval],
eval_stack[Api.benchmarks],
eval_stack[Api.datasetio],
eval_stack[Api.datasets],
- eval_stack[Api.models],
)
await register_dataset(datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval")
@@ -92,11 +91,10 @@ class Testeval:
@pytest.mark.asyncio
async def test_eval_run_eval(self, eval_stack, inference_model, judge_model):
- eval_impl, benchmarks_impl, datasets_impl, models_impl = (
+ eval_impl, benchmarks_impl, datasets_impl = (
eval_stack[Api.eval],
eval_stack[Api.benchmarks],
eval_stack[Api.datasets],
- eval_stack[Api.models],
)
await register_dataset(datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval")
@@ -131,11 +129,10 @@ class Testeval:
@pytest.mark.asyncio
async def test_eval_run_benchmark_eval(self, eval_stack, inference_model):
- eval_impl, benchmarks_impl, datasets_impl, models_impl = (
+ eval_impl, benchmarks_impl, datasets_impl = (
eval_stack[Api.eval],
eval_stack[Api.benchmarks],
eval_stack[Api.datasets],
- eval_stack[Api.models],
)
response = await datasets_impl.list_datasets()
diff --git a/llama_stack/providers/tests/report.py b/llama_stack/providers/tests/report.py
index febd13045..c9a7f69a8 100644
--- a/llama_stack/providers/tests/report.py
+++ b/llama_stack/providers/tests/report.py
@@ -18,54 +18,48 @@ from llama_stack.models.llama.sku_list import all_registered_models
INFERENCE_APIS = ["chat_completion"]
FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"]
SUPPORTED_MODELS = {
- "ollama": set(
- [
- CoreModelId.llama3_1_8b_instruct.value,
- CoreModelId.llama3_1_8b_instruct.value,
- CoreModelId.llama3_1_70b_instruct.value,
- CoreModelId.llama3_1_70b_instruct.value,
- CoreModelId.llama3_1_405b_instruct.value,
- CoreModelId.llama3_1_405b_instruct.value,
- CoreModelId.llama3_2_1b_instruct.value,
- CoreModelId.llama3_2_1b_instruct.value,
- CoreModelId.llama3_2_3b_instruct.value,
- CoreModelId.llama3_2_3b_instruct.value,
- CoreModelId.llama3_2_11b_vision_instruct.value,
- CoreModelId.llama3_2_11b_vision_instruct.value,
- CoreModelId.llama3_2_90b_vision_instruct.value,
- CoreModelId.llama3_2_90b_vision_instruct.value,
- CoreModelId.llama3_3_70b_instruct.value,
- CoreModelId.llama_guard_3_8b.value,
- CoreModelId.llama_guard_3_1b.value,
- ]
- ),
- "fireworks": set(
- [
- CoreModelId.llama3_1_8b_instruct.value,
- CoreModelId.llama3_1_70b_instruct.value,
- CoreModelId.llama3_1_405b_instruct.value,
- CoreModelId.llama3_2_1b_instruct.value,
- CoreModelId.llama3_2_3b_instruct.value,
- CoreModelId.llama3_2_11b_vision_instruct.value,
- CoreModelId.llama3_2_90b_vision_instruct.value,
- CoreModelId.llama3_3_70b_instruct.value,
- CoreModelId.llama_guard_3_8b.value,
- CoreModelId.llama_guard_3_11b_vision.value,
- ]
- ),
- "together": set(
- [
- CoreModelId.llama3_1_8b_instruct.value,
- CoreModelId.llama3_1_70b_instruct.value,
- CoreModelId.llama3_1_405b_instruct.value,
- CoreModelId.llama3_2_3b_instruct.value,
- CoreModelId.llama3_2_11b_vision_instruct.value,
- CoreModelId.llama3_2_90b_vision_instruct.value,
- CoreModelId.llama3_3_70b_instruct.value,
- CoreModelId.llama_guard_3_8b.value,
- CoreModelId.llama_guard_3_11b_vision.value,
- ]
- ),
+ "ollama": {
+ CoreModelId.llama3_1_8b_instruct.value,
+ CoreModelId.llama3_1_8b_instruct.value,
+ CoreModelId.llama3_1_70b_instruct.value,
+ CoreModelId.llama3_1_70b_instruct.value,
+ CoreModelId.llama3_1_405b_instruct.value,
+ CoreModelId.llama3_1_405b_instruct.value,
+ CoreModelId.llama3_2_1b_instruct.value,
+ CoreModelId.llama3_2_1b_instruct.value,
+ CoreModelId.llama3_2_3b_instruct.value,
+ CoreModelId.llama3_2_3b_instruct.value,
+ CoreModelId.llama3_2_11b_vision_instruct.value,
+ CoreModelId.llama3_2_11b_vision_instruct.value,
+ CoreModelId.llama3_2_90b_vision_instruct.value,
+ CoreModelId.llama3_2_90b_vision_instruct.value,
+ CoreModelId.llama3_3_70b_instruct.value,
+ CoreModelId.llama_guard_3_8b.value,
+ CoreModelId.llama_guard_3_1b.value,
+ },
+ "fireworks": {
+ CoreModelId.llama3_1_8b_instruct.value,
+ CoreModelId.llama3_1_70b_instruct.value,
+ CoreModelId.llama3_1_405b_instruct.value,
+ CoreModelId.llama3_2_1b_instruct.value,
+ CoreModelId.llama3_2_3b_instruct.value,
+ CoreModelId.llama3_2_11b_vision_instruct.value,
+ CoreModelId.llama3_2_90b_vision_instruct.value,
+ CoreModelId.llama3_3_70b_instruct.value,
+ CoreModelId.llama_guard_3_8b.value,
+ CoreModelId.llama_guard_3_11b_vision.value,
+ },
+ "together": {
+ CoreModelId.llama3_1_8b_instruct.value,
+ CoreModelId.llama3_1_70b_instruct.value,
+ CoreModelId.llama3_1_405b_instruct.value,
+ CoreModelId.llama3_2_3b_instruct.value,
+ CoreModelId.llama3_2_11b_vision_instruct.value,
+ CoreModelId.llama3_2_90b_vision_instruct.value,
+ CoreModelId.llama3_3_70b_instruct.value,
+ CoreModelId.llama_guard_3_8b.value,
+ CoreModelId.llama_guard_3_11b_vision.value,
+ },
}
diff --git a/llama_stack/providers/tests/scoring/test_scoring.py b/llama_stack/providers/tests/scoring/test_scoring.py
index e98fd8627..d80b105f4 100644
--- a/llama_stack/providers/tests/scoring/test_scoring.py
+++ b/llama_stack/providers/tests/scoring/test_scoring.py
@@ -45,13 +45,11 @@ class TestScoring:
scoring_functions_impl,
datasetio_impl,
datasets_impl,
- models_impl,
) = (
scoring_stack[Api.scoring],
scoring_stack[Api.scoring_functions],
scoring_stack[Api.datasetio],
scoring_stack[Api.datasets],
- scoring_stack[Api.models],
)
scoring_fns_list = await scoring_functions_impl.list_scoring_functions()
provider_id = scoring_fns_list[0].provider_id
@@ -102,13 +100,11 @@ class TestScoring:
scoring_functions_impl,
datasetio_impl,
datasets_impl,
- models_impl,
) = (
scoring_stack[Api.scoring],
scoring_stack[Api.scoring_functions],
scoring_stack[Api.datasetio],
scoring_stack[Api.datasets],
- scoring_stack[Api.models],
)
await register_dataset(datasets_impl, for_rag=True)
response = await datasets_impl.list_datasets()
@@ -163,13 +159,11 @@ class TestScoring:
scoring_functions_impl,
datasetio_impl,
datasets_impl,
- models_impl,
) = (
scoring_stack[Api.scoring],
scoring_stack[Api.scoring_functions],
scoring_stack[Api.datasetio],
scoring_stack[Api.datasets],
- scoring_stack[Api.models],
)
await register_dataset(datasets_impl, for_rag=True)
rows = await datasetio_impl.get_rows_paginated(
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py
index eaf5ad2e1..98c2bfd2e 100644
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@@ -6,7 +6,7 @@
import json
import logging
import warnings
-from typing import AsyncGenerator, Dict, Generator, Iterable, List, Optional, Union
+from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union
from openai import AsyncStream
from openai.types.chat import (
@@ -605,7 +605,7 @@ def convert_tool_call(
tool_name=tool_call.function.name,
arguments=json.loads(tool_call.function.arguments),
)
- except Exception as e:
+ except Exception:
return UnparseableToolCall(
call_id=tool_call.id or "",
tool_name=tool_call.function.name or "",
@@ -841,14 +841,13 @@ async def convert_openai_chat_completion_stream(
Convert a stream of OpenAI chat completion chunks into a stream
of ChatCompletionResponseStreamChunk.
"""
-
- # generate a stream of ChatCompletionResponseEventType: start -> progress -> progress -> ...
- def _event_type_generator() -> Generator[ChatCompletionResponseEventType, None, None]:
- yield ChatCompletionResponseEventType.start
- while True:
- yield ChatCompletionResponseEventType.progress
-
- event_type = _event_type_generator()
+ yield ChatCompletionResponseStreamChunk(
+ event=ChatCompletionResponseEvent(
+ event_type=ChatCompletionResponseEventType.start,
+ delta=TextDelta(text=""),
+ )
+ )
+ event_type = ChatCompletionResponseEventType.progress
stop_reason = None
toolcall_buffer = {}
@@ -868,7 +867,7 @@ async def convert_openai_chat_completion_stream(
if choice.delta.content:
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
- event_type=next(event_type),
+ event_type=event_type,
delta=TextDelta(text=choice.delta.content),
logprobs=_convert_openai_logprobs(logprobs),
)
@@ -877,7 +876,9 @@ async def convert_openai_chat_completion_stream(
# it is possible to have parallel tool calls in stream, but
# ChatCompletionResponseEvent only supports one per stream
if len(choice.delta.tool_calls) > 1:
- warnings.warn("multiple tool calls found in a single delta, using the first, ignoring the rest")
+ warnings.warn(
+ "multiple tool calls found in a single delta, using the first, ignoring the rest", stacklevel=2
+ )
if not enable_incremental_tool_calls:
yield ChatCompletionResponseStreamChunk(
@@ -909,7 +910,7 @@ async def convert_openai_chat_completion_stream(
toolcall_buffer["content"] += delta
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
- event_type=next(event_type),
+ event_type=event_type,
delta=ToolCallDelta(
tool_call=delta,
parse_status=ToolCallParseStatus.in_progress,
@@ -920,7 +921,7 @@ async def convert_openai_chat_completion_stream(
else:
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
- event_type=next(event_type),
+ event_type=event_type,
delta=TextDelta(text=choice.delta.content or ""),
logprobs=_convert_openai_logprobs(logprobs),
)
@@ -931,7 +932,7 @@ async def convert_openai_chat_completion_stream(
toolcall_buffer["content"] += delta
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
- event_type=next(event_type),
+ event_type=event_type,
delta=ToolCallDelta(
tool_call=delta,
parse_status=ToolCallParseStatus.in_progress,
diff --git a/llama_stack/providers/utils/kvstore/redis/redis.py b/llama_stack/providers/utils/kvstore/redis/redis.py
index f5254198b..a390ea866 100644
--- a/llama_stack/providers/utils/kvstore/redis/redis.py
+++ b/llama_stack/providers/utils/kvstore/redis/redis.py
@@ -36,7 +36,7 @@ class RedisKVStoreImpl(KVStore):
value = await self.redis.get(key)
if value is None:
return None
- ttl = await self.redis.ttl(key)
+ await self.redis.ttl(key)
return value
async def delete(self, key: str) -> None:
diff --git a/llama_stack/providers/utils/scoring/aggregation_utils.py b/llama_stack/providers/utils/scoring/aggregation_utils.py
index 35c4ee180..6686e4ade 100644
--- a/llama_stack/providers/utils/scoring/aggregation_utils.py
+++ b/llama_stack/providers/utils/scoring/aggregation_utils.py
@@ -32,7 +32,7 @@ def aggregate_categorical_count(
scoring_results: List[ScoringResultRow],
) -> Dict[str, Any]:
scores = [str(r["score"]) for r in scoring_results]
- unique_scores = sorted(list(set(scores)))
+ unique_scores = sorted(set(scores))
return {"categorical_count": {s: scores.count(s) for s in unique_scores}}
diff --git a/llama_stack/providers/utils/scoring/base_scoring_fn.py b/llama_stack/providers/utils/scoring/base_scoring_fn.py
index a741e5baa..d28c57cc1 100644
--- a/llama_stack/providers/utils/scoring/base_scoring_fn.py
+++ b/llama_stack/providers/utils/scoring/base_scoring_fn.py
@@ -66,7 +66,7 @@ class RegisteredBaseScoringFn(BaseScoringFn):
return self.__class__.__name__
def get_supported_scoring_fn_defs(self) -> List[ScoringFn]:
- return [x for x in self.supported_fn_defs_registry.values()]
+ return list(self.supported_fn_defs_registry.values())
def register_scoring_fn_def(self, scoring_fn: ScoringFn) -> None:
if scoring_fn.identifier in self.supported_fn_defs_registry:
diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py
index 924274c42..525ade74d 100644
--- a/llama_stack/providers/utils/telemetry/trace_protocol.py
+++ b/llama_stack/providers/utils/telemetry/trace_protocol.py
@@ -6,6 +6,7 @@
import asyncio
import inspect
+import json
from functools import wraps
from typing import Any, AsyncGenerator, Callable, Type, TypeVar
@@ -17,6 +18,10 @@ T = TypeVar("T")
def serialize_value(value: Any) -> Primitive:
+ return str(_prepare_for_json(value))
+
+
+def _prepare_for_json(value: Any) -> str:
"""Serialize a single value into JSON-compatible format."""
if value is None:
return ""
@@ -25,9 +30,17 @@ def serialize_value(value: Any) -> Primitive:
elif hasattr(value, "_name_"):
return value._name_
elif isinstance(value, BaseModel):
- return value.model_dump_json()
+ return json.loads(value.model_dump_json())
+ elif isinstance(value, (list, tuple, set)):
+ return [_prepare_for_json(item) for item in value]
+ elif isinstance(value, dict):
+ return {str(k): _prepare_for_json(v) for k, v in value.items()}
else:
- return str(value)
+ try:
+ json.dumps(value)
+ return value
+ except Exception:
+ return str(value)
def trace_protocol(cls: Type[T]) -> Type[T]:
@@ -104,7 +117,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]:
result = method(self, *args, **kwargs)
span.set_attribute("output", serialize_value(result))
return result
- except Exception as _e:
+ except Exception as e:
+ span.set_attribute("error", str(e))
raise
if is_async_gen:
diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py
index 76c7283eb..92c82983e 100644
--- a/llama_stack/scripts/distro_codegen.py
+++ b/llama_stack/scripts/distro_codegen.py
@@ -99,7 +99,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[
template = template_func()
normal_deps, special_deps = get_provider_dependencies(template.providers)
# Combine all dependencies in order: normal deps, special deps, server deps
- all_deps = sorted(list(set(normal_deps + SERVER_DEPENDENCIES))) + sorted(list(set(special_deps)))
+ all_deps = sorted(set(normal_deps + SERVER_DEPENDENCIES)) + sorted(set(special_deps))
return template.name, all_deps
except Exception:
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index 60556a6f3..87438fb6d 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -29,12 +29,31 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B
-Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model ┃ Size ┃ Modified Time ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
```
## Running the Distribution
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
index 2b117120c..e8dfaaf3c 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
@@ -31,12 +31,31 @@ The following environment variables can be configured:
## Prerequisite: Downloading Models
-Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
+Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
```
-$ ls ~/.llama/checkpoints
-Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B
-Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M
+$ llama model list --downloaded
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
+┃ Model ┃ Size ┃ Modified Time ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
+│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
+├─────────────────────────────────────────┼──────────┼─────────────────────┤
+│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
+└─────────────────────────────────────────┴──────────┴─────────────────────┘
```
## Running the Distribution
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 83c7b1a63..3c24a41ba 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate:
"inference": [inference_provider],
"vector_io": [vector_io_provider_sqlite],
},
- default_models=[inference_model],
+ default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
),
"run-with-safety.yaml": RunConfigSettings(
diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml
index 0c82552c6..a2428688e 100644
--- a/llama_stack/templates/ollama/run.yaml
+++ b/llama_stack/templates/ollama/run.yaml
@@ -90,6 +90,12 @@ models:
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
model_type: llm
+- metadata:
+ embedding_dimension: 384
+ model_id: all-MiniLM-L6-v2
+ provider_id: ollama
+ provider_model_id: all-minilm:latest
+ model_type: embedding
shields: []
vector_dbs: []
datasets: []
diff --git a/pyproject.toml b/pyproject.toml
index dc5659f06..893aa3330 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -123,39 +123,16 @@ select = [
"I", # isort
]
ignore = [
- "E203",
- "E305",
- "E402",
- "E501", # line too long
- "E721",
- "E741",
- "F405",
- "F841",
- "C408", # ignored because we like the dict keyword argument syntax
- "E302",
- "W291",
- "E303",
- "N812", # ignored because import torch.nn.functional as F is PyTorch convention
- "N817", # ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
- "E731", # allow usage of assigning lambda expressions
+ # The following ignores are desired by the project maintainers.
+ "E402", # Module level import not at top of file
+ "E501", # Line too long
+ "F405", # Maybe undefined or defined from star import
+ "C408", # Ignored because we like the dict keyword argument syntax
+ "N812", # Ignored because import torch.nn.functional as F is PyTorch convention
+
# These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
- "C901",
- "C405",
- "C414",
- "N803",
- "N999",
- "C403",
- "C416",
- "B028",
- "C419",
- "C401",
- "B023",
- # shebang has extra meaning in fbcode lints, so I think it's not worth trying
- # to line this up with executable bit
- "EXE001",
- "N802", # random naming hints don't need
+ "C901", # Complexity of the function is too high
# these ignores are from flake8-bugbear; please fix!
- "B007",
"B008",
]
diff --git a/tests/client-sdk/__init__.py b/tests/client-sdk/__init__.py
index 756f351d8..ce038c94b 100644
--- a/tests/client-sdk/__init__.py
+++ b/tests/client-sdk/__init__.py
@@ -3,3 +3,4 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+# ruff: noqa: N999
diff --git a/tests/client-sdk/agents/__init__.py b/tests/client-sdk/agents/__init__.py
index 756f351d8..ce038c94b 100644
--- a/tests/client-sdk/agents/__init__.py
+++ b/tests/client-sdk/agents/__init__.py
@@ -3,3 +3,4 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+# ruff: noqa: N999
diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py
index 6e3dc0739..9690a8139 100644
--- a/tests/client-sdk/agents/test_agents.py
+++ b/tests/client-sdk/agents/test_agents.py
@@ -4,20 +4,15 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-import json
-from typing import Dict, List
from uuid import uuid4
import pytest
from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.client_tool import ClientTool
+from llama_stack_client.lib.agents.client_tool import client_tool
from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types import ToolResponseMessage
from llama_stack_client.types.agents.turn_create_params import Document as AgentDocument
from llama_stack_client.types.memory_insert_params import Document
-from llama_stack_client.types.shared.completion_message import CompletionMessage
from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig
-from llama_stack_client.types.tool_def_param import Parameter
from llama_stack.apis.agents.agents import (
AgentConfig as Server__AgentConfig,
@@ -27,63 +22,22 @@ from llama_stack.apis.agents.agents import (
)
-class TestClientTool(ClientTool):
- """Tool to give boiling point of a liquid
- Returns the correct value for polyjuice in Celcius and Fahrenheit
- and returns -1 for other liquids
+@client_tool
+def get_boiling_point(liquid_name: str, celcius: bool = True) -> int:
"""
+ Returns the boiling point of a liquid in Celcius or Fahrenheit
- def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]:
- assert len(messages) == 1, "Expected single message"
-
- message = messages[0]
-
- tool_call = message.tool_calls[0]
-
- try:
- response = self.run_impl(**tool_call.arguments)
- response_str = json.dumps(response, ensure_ascii=False)
- except Exception as e:
- response_str = f"Error when running tool: {e}"
-
- message = ToolResponseMessage(
- role="tool",
- call_id=tool_call.call_id,
- tool_name=tool_call.tool_name,
- content=response_str,
- )
- return message
-
- def get_name(self) -> str:
- return "get_boiling_point"
-
- def get_description(self) -> str:
- return "Get the boiling point of imaginary liquids (eg. polyjuice)"
-
- def get_params_definition(self) -> Dict[str, Parameter]:
- return {
- "liquid_name": Parameter(
- name="liquid_name",
- parameter_type="string",
- description="The name of the liquid",
- required=True,
- ),
- "celcius": Parameter(
- name="celcius",
- parameter_type="boolean",
- description="Whether to return the boiling point in Celcius",
- required=False,
- ),
- }
-
- def run_impl(self, liquid_name: str, celcius: bool = True) -> int:
- if liquid_name.lower() == "polyjuice":
- if celcius:
- return -100
- else:
- return -212
+ :param liquid_name: The name of the liquid
+ :param celcius: Whether to return the boiling point in Celcius
+ :return: The boiling point of the liquid in Celcius or Fahrenheit
+ """
+ if liquid_name.lower() == "polyjuice":
+ if celcius:
+ return -100
else:
- return -1
+ return -212
+ else:
+ return -1
@pytest.fixture(scope="session")
@@ -298,7 +252,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config):
def test_custom_tool(llama_stack_client, agent_config):
- client_tool = TestClientTool()
+ client_tool = get_boiling_point
agent_config = {
**agent_config,
"toolgroups": ["builtin::websearch"],
@@ -326,7 +280,7 @@ def test_custom_tool(llama_stack_client, agent_config):
def test_tool_choice(llama_stack_client, agent_config):
def run_agent(tool_choice):
- client_tool = TestClientTool()
+ client_tool = get_boiling_point
test_agent_config = {
**agent_config,
@@ -362,7 +316,7 @@ def test_tool_choice(llama_stack_client, agent_config):
# TODO: fix this flaky test
def xtest_override_system_message_behavior(llama_stack_client, agent_config):
- client_tool = TestClientTool()
+ client_tool = get_boiling_point
agent_config = {
**agent_config,
"instructions": "You are a pirate",
@@ -458,7 +412,6 @@ def test_rag_agent(llama_stack_client, agent_config, rag_tool_name):
vector_db_id=vector_db_id,
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
- provider_id="faiss",
)
llama_stack_client.tool_runtime.rag_tool.insert(
documents=documents,
@@ -587,7 +540,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config):
def test_create_turn_response(llama_stack_client, agent_config):
- client_tool = TestClientTool()
+ client_tool = get_boiling_point
agent_config = {
**agent_config,
"input_shields": [],
diff --git a/tests/client-sdk/conftest.py b/tests/client-sdk/conftest.py
index c0f4dca53..3ecf45086 100644
--- a/tests/client-sdk/conftest.py
+++ b/tests/client-sdk/conftest.py
@@ -117,7 +117,7 @@ def client_with_models(llama_stack_client, text_model_id, vision_model_id, embed
assert len(providers) > 0, "No inference providers found"
inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"]
- model_ids = set(m.identifier for m in client.models.list())
+ model_ids = {m.identifier for m in client.models.list()}
model_ids.update(m.provider_resource_id for m in client.models.list())
if text_model_id and text_model_id not in model_ids:
diff --git a/tests/client-sdk/inference/__init__.py b/tests/client-sdk/inference/__init__.py
index 756f351d8..ce038c94b 100644
--- a/tests/client-sdk/inference/__init__.py
+++ b/tests/client-sdk/inference/__init__.py
@@ -3,3 +3,4 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+# ruff: noqa: N999
diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py
index 46a901d62..075f927f7 100644
--- a/tests/client-sdk/inference/test_embedding.py
+++ b/tests/client-sdk/inference/test_embedding.py
@@ -75,6 +75,26 @@ DUMMY_IMAGE_URL = ImageContentItem(
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
)
DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
+SUPPORTED_PROVIDERS = {"remote::nvidia"}
+MODELS_SUPPORTING_MEDIA = {}
+MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
+MODELS_REQUIRING_TASK_TYPE = {
+ "nvidia/llama-3.2-nv-embedqa-1b-v2",
+ "nvidia/nv-embedqa-e5-v5",
+ "nvidia/nv-embedqa-mistral-7b-v2",
+ "snowflake/arctic-embed-l",
+}
+MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
+
+
+def default_task_type(model_id):
+ """
+ Some models require a task type parameter. This provides a default value for
+ testing those models.
+ """
+ if model_id in MODELS_REQUIRING_TASK_TYPE:
+ return {"task_type": "query"}
+ return {}
@pytest.mark.parametrize(
@@ -88,8 +108,12 @@ DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64st
"list[text]",
],
)
-def test_embedding_text(llama_stack_client, embedding_model_id, contents):
- response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
+def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+ response = llama_stack_client.inference.embeddings(
+ model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
+ )
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
@@ -107,9 +131,14 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents):
"list[url,string,base64,text]",
],
)
-@pytest.mark.xfail(reason="Media is not supported")
-def test_embedding_image(llama_stack_client, embedding_model_id, contents):
- response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
+def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+ if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
+ pytest.xfail(f"{embedding_model_id} doesn't support media")
+ response = llama_stack_client.inference.embeddings(
+ model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
+ )
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
@@ -134,9 +163,16 @@ def test_embedding_image(llama_stack_client, embedding_model_id, contents):
"short",
],
)
-def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents):
+def test_embedding_truncation(
+ llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
+):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
- model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
+ model_id=embedding_model_id,
+ contents=contents,
+ text_truncation=text_truncation,
+ **default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
@@ -162,25 +198,43 @@ def test_embedding_truncation(llama_stack_client, embedding_model_id, text_trunc
"long-str",
],
)
-def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents):
- with pytest.raises(BadRequestError) as excinfo:
+def test_embedding_truncation_error(
+ llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
+):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+ with pytest.raises(BadRequestError):
llama_stack_client.inference.embeddings(
- model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
+ model_id=embedding_model_id,
+ contents=[DUMMY_LONG_TEXT],
+ text_truncation=text_truncation,
+ **default_task_type(embedding_model_id),
)
-@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
-def test_embedding_output_dimension(llama_stack_client, embedding_model_id):
- base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
+def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+ if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
+ pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
+ base_response = llama_stack_client.inference.embeddings(
+ model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
+ )
test_response = llama_stack_client.inference.embeddings(
- model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
+ model_id=embedding_model_id,
+ contents=[DUMMY_STRING],
+ **default_task_type(embedding_model_id),
+ output_dimension=32,
)
assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
assert len(test_response.embeddings[0]) == 32
-@pytest.mark.xfail(reason="Only valid for model supporting task type")
-def test_embedding_task_type(llama_stack_client, embedding_model_id):
+def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+ if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
+ pytest.xfail(f"{embedding_model_id} doesn't support task_type")
query_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
)
@@ -199,9 +253,14 @@ def test_embedding_task_type(llama_stack_client, embedding_model_id):
"start",
],
)
-def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation):
+def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
- model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
+ model_id=embedding_model_id,
+ contents=[DUMMY_STRING],
+ text_truncation=text_truncation,
+ **default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
@@ -219,8 +278,15 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_
"right",
],
)
-def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation):
- with pytest.raises(BadRequestError) as excinfo:
+def test_embedding_text_truncation_error(
+ llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
+):
+ if inference_provider_type not in SUPPORTED_PROVIDERS:
+ pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
+ with pytest.raises(BadRequestError):
llama_stack_client.inference.embeddings(
- model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
+ model_id=embedding_model_id,
+ contents=[DUMMY_STRING],
+ text_truncation=text_truncation,
+ **default_task_type(embedding_model_id),
)
diff --git a/tests/client-sdk/inference/test_text_inference.py b/tests/client-sdk/inference/test_text_inference.py
index 7850d2d57..63813a1cc 100644
--- a/tests/client-sdk/inference/test_text_inference.py
+++ b/tests/client-sdk/inference/test_text_inference.py
@@ -139,7 +139,7 @@ def test_text_completion_log_probs_streaming(client_with_models, text_model_id,
"top_k": 1,
},
)
- streamed_content = [chunk for chunk in response]
+ streamed_content = list(response)
for chunk in streamed_content:
if chunk.delta: # if there's a token, we expect logprobs
assert chunk.logprobs, "Logprobs should not be empty"
@@ -405,7 +405,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request(
assert delta.tool_call.tool_name == "get_object_namespace_list"
if delta.type == "tool_call" and delta.parse_status == "failed":
# expect raw message that failed to parse in tool_call
- assert type(delta.tool_call) == str
+ assert isinstance(delta.tool_call, str)
assert len(delta.tool_call) > 0
else:
for tc in response.completion_message.tool_calls:
diff --git a/tests/client-sdk/report.py b/tests/client-sdk/report.py
index b946b85ba..0151b3d20 100644
--- a/tests/client-sdk/report.py
+++ b/tests/client-sdk/report.py
@@ -42,29 +42,27 @@ def featured_models():
SUPPORTED_MODELS = {
- "ollama": set(
- [
- CoreModelId.llama3_1_8b_instruct.value,
- CoreModelId.llama3_1_8b_instruct.value,
- CoreModelId.llama3_1_70b_instruct.value,
- CoreModelId.llama3_1_70b_instruct.value,
- CoreModelId.llama3_1_405b_instruct.value,
- CoreModelId.llama3_1_405b_instruct.value,
- CoreModelId.llama3_2_1b_instruct.value,
- CoreModelId.llama3_2_1b_instruct.value,
- CoreModelId.llama3_2_3b_instruct.value,
- CoreModelId.llama3_2_3b_instruct.value,
- CoreModelId.llama3_2_11b_vision_instruct.value,
- CoreModelId.llama3_2_11b_vision_instruct.value,
- CoreModelId.llama3_2_90b_vision_instruct.value,
- CoreModelId.llama3_2_90b_vision_instruct.value,
- CoreModelId.llama3_3_70b_instruct.value,
- CoreModelId.llama_guard_3_8b.value,
- CoreModelId.llama_guard_3_1b.value,
- ]
- ),
- "tgi": set([model.core_model_id.value for model in all_registered_models() if model.huggingface_repo]),
- "vllm": set([model.core_model_id.value for model in all_registered_models() if model.huggingface_repo]),
+ "ollama": {
+ CoreModelId.llama3_1_8b_instruct.value,
+ CoreModelId.llama3_1_8b_instruct.value,
+ CoreModelId.llama3_1_70b_instruct.value,
+ CoreModelId.llama3_1_70b_instruct.value,
+ CoreModelId.llama3_1_405b_instruct.value,
+ CoreModelId.llama3_1_405b_instruct.value,
+ CoreModelId.llama3_2_1b_instruct.value,
+ CoreModelId.llama3_2_1b_instruct.value,
+ CoreModelId.llama3_2_3b_instruct.value,
+ CoreModelId.llama3_2_3b_instruct.value,
+ CoreModelId.llama3_2_11b_vision_instruct.value,
+ CoreModelId.llama3_2_11b_vision_instruct.value,
+ CoreModelId.llama3_2_90b_vision_instruct.value,
+ CoreModelId.llama3_2_90b_vision_instruct.value,
+ CoreModelId.llama3_3_70b_instruct.value,
+ CoreModelId.llama_guard_3_8b.value,
+ CoreModelId.llama_guard_3_1b.value,
+ },
+ "tgi": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
+ "vllm": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
}
diff --git a/tests/client-sdk/safety/__init__.py b/tests/client-sdk/safety/__init__.py
index 756f351d8..ce038c94b 100644
--- a/tests/client-sdk/safety/__init__.py
+++ b/tests/client-sdk/safety/__init__.py
@@ -3,3 +3,4 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+# ruff: noqa: N999
diff --git a/tests/client-sdk/safety/test_safety.py b/tests/client-sdk/safety/test_safety.py
index 1417a9c06..79963e4d4 100644
--- a/tests/client-sdk/safety/test_safety.py
+++ b/tests/client-sdk/safety/test_safety.py
@@ -42,7 +42,7 @@ def code_scanner_shield_id(available_shields):
@pytest.fixture(scope="session")
def model_providers(llama_stack_client):
- return set([x.provider_id for x in llama_stack_client.providers.list() if x.api == "inference"])
+ return {x.provider_id for x in llama_stack_client.providers.list() if x.api == "inference"}
def test_unsafe_examples(llama_stack_client, llama_guard_text_shield_id):
diff --git a/tests/client-sdk/tool_runtime/test_rag_tool.py b/tests/client-sdk/tool_runtime/test_rag_tool.py
index 40940f1ef..e330a10f5 100644
--- a/tests/client-sdk/tool_runtime/test_rag_tool.py
+++ b/tests/client-sdk/tool_runtime/test_rag_tool.py
@@ -24,7 +24,6 @@ def single_entry_vector_db_registry(llama_stack_client, empty_vector_db_registry
vector_db_id=vector_db_id,
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
- provider_id="faiss",
)
vector_dbs = [vector_db.identifier for vector_db in llama_stack_client.vector_dbs.list()]
return vector_dbs
@@ -121,7 +120,6 @@ def test_vector_db_insert_from_url_and_query(llama_stack_client, empty_vector_db
vector_db_id=vector_db_id,
embedding_model="all-MiniLM-L6-v2",
embedding_dimension=384,
- provider_id="faiss",
)
# list to check memory bank is successfully registered
diff --git a/tests/client-sdk/vector_io/__init__.py b/tests/client-sdk/vector_io/__init__.py
index 756f351d8..ce038c94b 100644
--- a/tests/client-sdk/vector_io/__init__.py
+++ b/tests/client-sdk/vector_io/__init__.py
@@ -3,3 +3,4 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+# ruff: noqa: N999