diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2a9f4b6f7..6b98cad90 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -52,6 +52,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -97,6 +109,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -128,6 +152,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -159,6 +195,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -183,6 +231,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -219,6 +279,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -255,6 +327,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -286,6 +370,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -317,6 +413,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -362,6 +470,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -410,6 +530,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -438,6 +570,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -462,6 +606,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -492,6 +648,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -532,6 +700,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -570,6 +750,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -608,6 +800,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -648,6 +852,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -679,6 +895,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -719,6 +947,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -773,6 +1013,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -826,6 +1078,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -863,6 +1127,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -884,6 +1160,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -921,6 +1209,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -942,6 +1242,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -979,6 +1291,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1016,6 +1340,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1046,6 +1382,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1084,6 +1432,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1124,6 +1484,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1154,6 +1526,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1175,6 +1559,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1205,6 +1601,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1242,6 +1650,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1279,6 +1699,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1309,6 +1741,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1337,6 +1781,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1373,6 +1829,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1422,6 +1890,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1443,6 +1923,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1473,6 +1965,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1487,6 +1991,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1511,6 +2027,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1542,6 +2070,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1580,6 +2120,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1609,6 +2161,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1647,6 +2211,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1685,6 +2261,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1697,6 +2285,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1728,6 +2328,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1740,6 +2352,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1771,6 +2395,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1802,6 +2438,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1821,6 +2469,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1852,6 +2512,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1873,6 +2545,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1894,6 +2578,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1932,6 +2628,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1944,6 +2652,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1975,6 +2695,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1994,6 +2726,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2025,6 +2769,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2037,6 +2793,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2068,6 +2836,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2098,6 +2878,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2117,6 +2909,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2141,6 +2945,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2172,6 +2988,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2203,6 +3031,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2234,6 +3074,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2265,6 +3117,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2296,6 +3160,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2332,6 +3208,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2391,6 +3279,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2431,6 +3331,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2455,6 +3367,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2486,6 +3410,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2517,6 +3453,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2548,6 +3496,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2579,6 +3539,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2610,6 +3582,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2623,6 +3607,35 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { + "Error": { + "type": "object", + "properties": { + "status": { + "type": "integer", + "description": "HTTP status code" + }, + "title": { + "type": "string", + "description": "Error title, a short summary of the error which is invariant for an error type" + }, + "detail": { + "type": "string", + "description": "Error detail, a longer human-readable description of the error" + }, + "instance": { + "type": "string", + "description": "(Optional) A URL which can be used to retrieve more information about the specific occurrence of the error" + } + }, + "additionalProperties": false, + "required": [ + "status", + "title", + "detail" + ], + "title": "Error", + "description": "Error response from the API. Roughly follows RFC 7807." + }, "AppendRowsRequest": { "type": "object", "properties": { @@ -8741,7 +9754,68 @@ "title": "VersionInfo" } }, - "responses": {} + "responses": { + "BadRequest400": { + "description": "The request was invalid or malformed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 400, + "title": "Bad Request", + "detail": "The request was invalid or malformed" + } + } + } + }, + "TooManyRequests429": { + "description": "The client has sent too many requests in a given amount of time", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 429, + "title": "Too Many Requests", + "detail": "You have exceeded the rate limit. Please try again later." + } + } + } + }, + "InternalServerError500": { + "description": "The server encountered an unexpected error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred. Our team has been notified." + } + } + } + }, + "DefaultError": { + "description": "An unexpected error occurred", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 0, + "title": "Error", + "detail": "An unexpected error occurred" + } + } + } + } + } }, "security": [ { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a2329e47a..13f7edc4b 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -19,6 +19,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PaginatedRowsResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - DatasetIO description: '' @@ -47,6 +57,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - DatasetIO description: '' @@ -66,6 +86,16 @@ paths: application/json: schema: $ref: '#/components/schemas/BatchChatCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - BatchInference (Coming Soon) description: '' @@ -85,6 +115,16 @@ paths: application/json: schema: $ref: '#/components/schemas/BatchCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - BatchInference (Coming Soon) description: '' @@ -100,6 +140,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -124,6 +174,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -149,6 +209,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/CompletionResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -169,6 +239,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -188,6 +268,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentSessionCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -217,6 +307,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -246,6 +346,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListBucketResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: List all buckets. @@ -263,6 +373,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileUploadResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -279,6 +399,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -297,6 +427,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Session' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -322,6 +462,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -345,6 +495,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -371,6 +531,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -401,6 +571,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EmbeddingsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -421,6 +601,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -445,6 +635,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentStepResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -478,6 +678,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Turn' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -508,6 +718,16 @@ paths: oneOf: - $ref: '#/components/schemas/Benchmark' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -528,6 +748,16 @@ paths: oneOf: - $ref: '#/components/schemas/Dataset' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -541,6 +771,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -561,6 +801,16 @@ paths: oneOf: - $ref: '#/components/schemas/Model' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -574,6 +824,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -594,6 +854,16 @@ paths: oneOf: - $ref: '#/components/schemas/ScoringFn' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -614,6 +884,16 @@ paths: oneOf: - $ref: '#/components/schemas/Shield' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -632,6 +912,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Span' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -655,6 +945,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QuerySpanTreeResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -679,6 +979,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Tool' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: '' @@ -697,6 +1007,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ToolGroup' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: '' @@ -710,6 +1030,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: Unregister a tool group @@ -728,6 +1058,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Trace' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -748,6 +1088,16 @@ paths: oneOf: - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -768,6 +1118,16 @@ paths: oneOf: - $ref: '#/components/schemas/PostTrainingJobStatusResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -786,6 +1146,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListPostTrainingJobsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -801,6 +1171,16 @@ paths: oneOf: - $ref: '#/components/schemas/FileUploadResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -822,6 +1202,16 @@ paths: oneOf: - $ref: '#/components/schemas/FileResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -852,6 +1242,16 @@ paths: oneOf: - $ref: '#/components/schemas/VectorDB' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -865,6 +1265,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -883,6 +1293,16 @@ paths: application/json: schema: $ref: '#/components/schemas/HealthInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -892,6 +1312,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: >- @@ -908,6 +1338,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorIO description: '' @@ -927,6 +1367,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ToolInvocationResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: Run a tool with the given arguments @@ -948,6 +1398,16 @@ paths: oneOf: - $ref: '#/components/schemas/JobStatus' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -966,6 +1426,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -989,6 +1459,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -1012,6 +1492,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListBenchmarksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -1020,6 +1510,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -1039,6 +1539,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListDatasetsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -1047,6 +1557,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -1066,6 +1586,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListFileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: List all files in a bucket. @@ -1085,6 +1615,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -1097,6 +1637,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Model' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -1116,6 +1666,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListProvidersResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1129,6 +1689,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListRoutesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1142,6 +1712,16 @@ paths: application/jsonl: schema: $ref: '#/components/schemas/ToolDef' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: '' @@ -1165,6 +1745,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListScoringFunctionsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -1173,6 +1763,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -1192,6 +1792,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListShieldsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -1204,6 +1814,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Shield' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -1223,6 +1843,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListToolGroupsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: List tool groups with optional provider @@ -1231,6 +1861,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: Register a tool group @@ -1250,6 +1890,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListToolsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: List tools with optional tool group @@ -1268,6 +1918,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListVectorDBsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -1280,6 +1940,16 @@ paths: application/json: schema: $ref: '#/components/schemas/VectorDB' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -1295,6 +1965,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1314,6 +1994,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -1333,6 +2023,16 @@ paths: application/json: schema: $ref: '#/components/schemas/RAGQueryResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: >- @@ -1353,6 +2053,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QueryChunksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorIO description: '' @@ -1372,6 +2082,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QuerySpansResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1391,6 +2111,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QueryTracesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1415,6 +2145,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: >- @@ -1457,6 +2197,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -1481,6 +2231,16 @@ paths: application/json: schema: $ref: '#/components/schemas/RunShieldResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Safety description: '' @@ -1496,6 +2256,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1515,6 +2285,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ScoreResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Scoring description: '' @@ -1534,6 +2314,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ScoreBatchResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Scoring description: '' @@ -1553,6 +2343,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -1572,6 +2372,16 @@ paths: application/json: schema: $ref: '#/components/schemas/SyntheticDataGenerationResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - SyntheticDataGeneration (Coming Soon) description: '' @@ -1591,6 +2401,16 @@ paths: application/json: schema: $ref: '#/components/schemas/VersionInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1599,6 +2419,34 @@ jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: schemas: + Error: + type: object + properties: + status: + type: integer + description: HTTP status code + title: + type: string + description: >- + Error title, a short summary of the error which is invariant for an error + type + detail: + type: string + description: >- + Error detail, a longer human-readable description of the error + instance: + type: string + description: >- + (Optional) A URL which can be used to retrieve more information about + the specific occurrence of the error + additionalProperties: false + required: + - status + - title + - detail + title: Error + description: >- + Error response from the API. Roughly follows RFC 7807. AppendRowsRequest: type: object properties: @@ -5626,7 +6474,51 @@ components: required: - version title: VersionInfo - responses: {} + responses: + BadRequest400: + description: The request was invalid or malformed + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 400 + title: Bad Request + detail: The request was invalid or malformed + TooManyRequests429: + description: >- + The client has sent too many requests in a given amount of time + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 429 + title: Too Many Requests + detail: >- + You have exceeded the rate limit. Please try again later. + InternalServerError500: + description: >- + The server encountered an unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 500 + title: Internal Server Error + detail: >- + An unexpected error occurred. Our team has been notified. + DefaultError: + description: An unexpected error occurred + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 0 + title: Error + detail: An unexpected error occurred security: - Default: [] tags: diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index 8ae6fed24..21436327e 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -1145,6 +1145,7 @@ } ], "source": [ + "# NBVAL_SKIP\n", "from pydantic import BaseModel\n", "\n", "\n", @@ -2885,7 +2886,6 @@ } ], "source": [ - "# NBVAL_SKIP\n", "from llama_stack_client.lib.agents.agent import Agent\n", "from llama_stack_client.lib.agents.event_logger import EventLogger\n", "from llama_stack_client.types.agent_create_params import AgentConfig\n", @@ -4326,7 +4326,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "toolchain", + "display_name": "master", "language": "python", "name": "python3" }, diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index dcbee7d2f..a2553f905 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -55,6 +55,7 @@ def main(output_dir: str): a set of endpoints and their corresponding interfaces that are tailored to best leverage Llama Models.""", ), + include_standard_error_responses=True, ), ) diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 4220cfc05..91f32e6c8 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -10,6 +10,7 @@ import typing from dataclasses import make_dataclass from typing import Any, Dict, Set, Union +from llama_stack.apis.datatypes import Error from llama_stack.strong_typing.core import JsonType from llama_stack.strong_typing.docstring import Docstring, parse_type from llama_stack.strong_typing.inspection import ( @@ -434,6 +435,75 @@ class Generator: ) self.schema_builder = SchemaBuilder(schema_generator) self.responses = {} + + # Create standard error responses + self._create_standard_error_responses() + + def _create_standard_error_responses(self) -> None: + """ + Creates standard error responses that can be reused across operations. + These will be added to the components.responses section of the OpenAPI document. + """ + # Get the Error schema + error_schema = self.schema_builder.classdef_to_ref(Error) + + # Create standard error responses + self.responses["BadRequest400"] = Response( + description="The request was invalid or malformed", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 400, + "title": "Bad Request", + "detail": "The request was invalid or malformed", + } + ) + } + ) + + self.responses["TooManyRequests429"] = Response( + description="The client has sent too many requests in a given amount of time", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 429, + "title": "Too Many Requests", + "detail": "You have exceeded the rate limit. Please try again later.", + } + ) + } + ) + + self.responses["InternalServerError500"] = Response( + description="The server encountered an unexpected error", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred. Our team has been notified.", + } + ) + } + ) + + # Add a default error response for any unhandled error cases + self.responses["DefaultError"] = Response( + description="An unexpected error occurred", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 0, + "title": "Error", + "detail": "An unexpected error occurred", + } + ) + } + ) def _build_type_tag(self, ref: str, schema: Schema) -> Tag: # Don't include schema definition in the tag description because for one, @@ -649,6 +719,18 @@ class Generator: responses.update(response_builder.build_response(response_options)) assert len(responses.keys()) > 0, f"No responses found for {op.name}" + + # Add standard error response references + if self.options.include_standard_error_responses: + if "400" not in responses: + responses["400"] = ResponseRef("BadRequest400") + if "429" not in responses: + responses["429"] = ResponseRef("TooManyRequests429") + if "500" not in responses: + responses["500"] = ResponseRef("InternalServerError500") + if "default" not in responses: + responses["default"] = ResponseRef("DefaultError") + if op.event_type is not None: builder = ContentBuilder(self.schema_builder) callbacks = { diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py index f80da453b..edc861ad5 100644 --- a/docs/openapi_generator/pyopenapi/options.py +++ b/docs/openapi_generator/pyopenapi/options.py @@ -35,6 +35,7 @@ class Options: :param error_wrapper: True if errors are encapsulated in an error object wrapper. :param property_description_fun: Custom transformation function to apply to class property documentation strings. :param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types. + :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations. """ server: Server @@ -52,6 +53,7 @@ class Options: error_wrapper: bool = False property_description_fun: Optional[Callable[[type, str, str], str]] = None captions: Optional[Dict[str, str]] = None + include_standard_error_responses: bool = True default_captions: ClassVar[Dict[str, str]] = { "Operations": "Operations", diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index 9cb1a402f..20a835201 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -106,7 +106,7 @@ It would be best to start with a template and understand the structure of the co llama stack build > Enter a name for your Llama Stack (e.g. my-local-stack): my-stack -> Enter the image type you want your Llama Stack to be built as (container or conda): conda +> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda Llama Stack is composed of several APIs working together. Let's select the provider types (implementations) you want to use for these APIs. @@ -187,7 +187,7 @@ usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-i [--tls-certfile TLS_CERTFILE] [--image-type {conda,container,venv}] config -start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. +Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. positional arguments: config Path to config file to use for the run diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index b183757db..b8d1b1714 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -41,12 +41,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 9aeb7a88b..a49175e22 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -41,12 +41,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index ecef20d55..eb0dcf392 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -38,7 +38,7 @@ The API is **exactly identical** for both clients. :::{dropdown} Starting up the Llama Stack server The Llama Stack server can be configured flexibly so you can mix-and-match various providers for its individual API components -- beyond Inference, these include Vector IO, Agents, Telemetry, Evals, Post Training, etc. -To get started quickly, we provide various container images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the container image. +To get started quickly, we provide various container images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the container image. If you'd like to build your own image or customize the configurations, please check out [this guide](../references/index.md). Lets setup some environment variables that we will use in the rest of the guide. ```bash diff --git a/docs/source/references/llama_cli_reference/download_models.md b/docs/source/references/llama_cli_reference/download_models.md index 6c791bcb7..ca470f8c2 100644 --- a/docs/source/references/llama_cli_reference/download_models.md +++ b/docs/source/references/llama_cli_reference/download_models.md @@ -129,3 +129,35 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern **Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. + +## List the downloaded models + +To list the downloaded models with the following command: +``` +llama model list --downloaded +``` + +You should see a table like this: +``` +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ +``` diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md index a43666963..8a38fc3ae 100644 --- a/docs/source/references/llama_cli_reference/index.md +++ b/docs/source/references/llama_cli_reference/index.md @@ -154,6 +154,38 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. +## List the downloaded models + +To list the downloaded models with the following command: +``` +llama model list --downloaded +``` + +You should see a table like this: +``` +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ +``` + ## Understand the models The `llama model` command helps you explore the model’s interface. diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 6df93052c..842a2b63d 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -5,6 +5,9 @@ # the root directory of this source tree. from enum import Enum +from typing import Optional + +from pydantic import BaseModel from llama_stack.schema_utils import json_schema_type @@ -33,3 +36,20 @@ class Api(Enum): # built-in API inspect = "inspect" + + +@json_schema_type +class Error(BaseModel): + """ + Error response from the API. Roughly follows RFC 7807. + + :param status: HTTP status code + :param title: Error title, a short summary of the error which is invariant for an error type + :param detail: Error detail, a longer human-readable description of the error + :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error + """ + + status: int + title: str + detail: str + instance: Optional[str] = None diff --git a/llama_stack/cli/model/prompt_format.py b/llama_stack/cli/model/prompt_format.py index ea9596ba5..516c67634 100644 --- a/llama_stack/cli/model/prompt_format.py +++ b/llama_stack/cli/model/prompt_format.py @@ -9,6 +9,7 @@ import textwrap from io import StringIO from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table from llama_stack.models.llama.datatypes import CoreModelId, ModelFamily, is_multimodal, model_family @@ -48,7 +49,26 @@ class ModelPromptFormat(Subcommand): supported_model_ids = [ m for m in CoreModelId if model_family(m) in {ModelFamily.llama3_1, ModelFamily.llama3_2} ] - model_str = "\n".join([m.value for m in supported_model_ids]) + + model_list = [m.value for m in supported_model_ids] + model_str = "\n".join(model_list) + + if args.list: + headers = ["Model(s)"] + rows = [] + for m in model_list: + rows.append( + [ + m, + ] + ) + print_table( + rows, + headers, + separate_rows=True, + ) + return + try: model_id = CoreModelId(args.model_name) except ValueError: diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 89db368db..baa7d2e32 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -141,7 +141,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None: completer=WordCompleter(available_providers), complete_while_typing=True, validator=Validator.from_callable( - lambda x: x in available_providers, + lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847 error_message="Invalid provider, use to see options", ), ) diff --git a/llama_stack/cli/tests/test_stack_config.py b/llama_stack/cli/tests/test_stack_config.py index 2b7b2b210..333f86e38 100644 --- a/llama_stack/cli/tests/test_stack_config.py +++ b/llama_stack/cli/tests/test_stack_config.py @@ -112,7 +112,7 @@ def test_parse_and_maybe_upgrade_config_old_format(old_config): inference_providers = result.providers["inference"] assert len(inference_providers) == 2 - assert set(x.provider_id for x in inference_providers) == { + assert {x.provider_id for x in inference_providers} == { "remote::ollama-00", "meta-reference-01", } diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 2b43b8128..3d808a4a4 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -15,7 +15,6 @@ from termcolor import cprint from llama_stack.distribution.datatypes import BuildConfig, Provider from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR from llama_stack.distribution.utils.exec import run_command, run_with_pty from llama_stack.distribution.utils.image_types import ImageType from llama_stack.providers.datatypes import Api @@ -103,8 +102,6 @@ def build_image( template_or_config, image_name, container_base, - str(build_file_path), - str(BUILDS_BASE_DIR / ImageType.container.value), " ".join(normal_deps), ] elif build_config.image_type == ImageType.conda.value: diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh index 31b3e1b21..1eac2ee08 100755 --- a/llama_stack/distribution/build_conda_env.sh +++ b/llama_stack/distribution/build_conda_env.sh @@ -52,7 +52,7 @@ ensure_conda_env_python310() { local python_version="3.10" # Check if conda command is available - if ! command -v conda &>/dev/null; then + if ! is_command_available conda; then printf "${RED}Error: conda command not found. Is Conda installed and in your PATH?${NC}" >&2 exit 1 fi diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 08941a538..68f8a0863 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -20,26 +20,27 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500} # mounting is not supported by docker buildx, so we use COPY instead USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-} -if [ "$#" -lt 6 ]; then +if [ "$#" -lt 4 ]; then # This only works for templates - echo "Usage: $0 []" >&2 + echo "Usage: $0 []" >&2 exit 1 fi set -euo pipefail template_or_config="$1" -image_name="$2" -container_base="$3" -build_file_path="$4" -host_build_dir="$5" -pip_dependencies="$6" -special_pip_deps="${7:-}" +shift +image_name="$1" +shift +container_base="$1" +shift +pip_dependencies="$1" +shift +special_pip_deps="${1:-}" # Define color codes RED='\033[0;31m' -GREEN='\033[0;32m' NC='\033[0m' # No Color CONTAINER_BINARY=${CONTAINER_BINARY:-docker} @@ -47,8 +48,10 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-} TEMP_DIR=$(mktemp -d) +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +source "$SCRIPT_DIR/common.sh" + add_to_container() { - local input output_file="$TEMP_DIR/Containerfile" if [ -t 0 ]; then printf '%s\n' "$1" >>"$output_file" @@ -58,15 +61,21 @@ add_to_container() { fi } +# Check if container command is available +if ! is_command_available $CONTAINER_BINARY; then + printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2 + exit 1 +fi + # Update and install UBI9 components if UBI9 base image is used if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then add_to_container << EOF FROM $container_base WORKDIR /app -RUN microdnf -y update && microdnf install -y iputils net-tools wget \ +RUN dnf -y update && dnf install -y iputils net-tools wget \ vim-minimal python3.11 python3.11-pip python3.11-wheel \ - python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && microdnf clean all + python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all ENV UV_SYSTEM_PYTHON=1 RUN pip install uv @@ -165,6 +174,11 @@ EOF fi fi +# remove uv after installation + add_to_container << EOF +RUN pip uninstall -y uv +EOF + # if template_or_config ends with .yaml, it is not a template and we should not use the --template flag if [[ "$template_or_config" != *.yaml ]]; then add_to_container << EOF @@ -185,26 +199,31 @@ RUN mkdir -p /.llama /.cache RUN chmod -R g+rw /app /.llama /.cache EOF -printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n" -cat $TEMP_DIR/Containerfile +printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR" +cat "$TEMP_DIR"/Containerfile printf "\n" -mounts="" +# Start building the CLI arguments +CLI_ARGS=() + +# Read CONTAINER_OPTS and put it in an array +read -ra CLI_ARGS <<< "$CONTAINER_OPTS" + if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then if [ -n "$LLAMA_STACK_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):$stack_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount") fi if [ -n "$LLAMA_MODELS_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_MODELS_DIR"):$models_mount") fi if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_STACK_CLIENT_DIR):$client_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount") fi fi -if command -v selinuxenabled &>/dev/null && selinuxenabled; then +if is_command_available selinuxenabled && selinuxenabled; then # Disable SELinux labels -- we don't want to relabel the llama-stack source dir - CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable" + CLI_ARGS+=("--security-opt" "label=disable") fi # Set version tag based on PyPI version @@ -225,11 +244,11 @@ image_tag="$image_name:$version_tag" # Detect platform architecture ARCH=$(uname -m) if [ -n "$BUILD_PLATFORM" ]; then - PLATFORM="--platform $BUILD_PLATFORM" + CLI_ARGS+=("--platform $BUILD_PLATFORM") elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then - PLATFORM="--platform linux/arm64" + CLI_ARGS+=("--platform" "linux/arm64") elif [ "$ARCH" = "x86_64" ]; then - PLATFORM="--platform linux/amd64" + CLI_ARGS+=("--platform" "linux/amd64") else echo "Unsupported architecture: $ARCH" exit 1 @@ -238,8 +257,13 @@ fi echo "PWD: $(pwd)" echo "Containerfile: $TEMP_DIR/Containerfile" set -x -$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag \ - -f "$TEMP_DIR/Containerfile" "." $mounts --progress=plain + +$CONTAINER_BINARY build \ + "${CLI_ARGS[@]}" \ + -t "$image_tag" \ + -f "$TEMP_DIR/Containerfile" \ + "." \ + --progress=plain # clean up tmp/configs set +x diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py index 384e2c3c8..308081415 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/distribution/distribution.py @@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import Api, ProviderSpec def stack_apis() -> List[Api]: - return [v for v in Api] + return list(Api) class AutoRoutedApiInfo(BaseModel): @@ -55,7 +55,7 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]: def providable_apis() -> List[Api]: - routing_table_apis = set(x.routing_table_api for x in builtin_automatically_routed_apis()) + routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()} return [api for api in Api if api not in routing_table_apis and api != Api.inspect] diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 0bc2e774c..69a096e97 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -115,8 +115,8 @@ async def resolve_impls( - flatmaps, sorts and resolves the providers in dependency order - for each API, produces either a (local, passthrough or router) implementation """ - routing_table_apis = set(x.routing_table_api for x in builtin_automatically_routed_apis()) - router_apis = set(x.router_api for x in builtin_automatically_routed_apis()) + routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()} + router_apis = {x.router_api for x in builtin_automatically_routed_apis()} providers_with_specs = {} diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index c2434e517..80e9ecb7c 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -318,14 +318,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): ) model = await self.get_object_by_identifier("model", embedding_model) if model is None: - if embedding_model == "all-MiniLM-L6-v2": - raise ValueError( - "Embeddings are now served via Inference providers. " - "Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. " - "See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example." - ) - else: - raise ValueError(f"Model {embedding_model} not found") + raise ValueError(f"Model {embedding_model} not found") if model.model_type != ModelType.embedding: raise ValueError(f"Model {embedding_model} is not an embedding model") if "embedding_dimension" not in model.metadata: diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py index 202c9322f..4a916321d 100644 --- a/llama_stack/distribution/ui/page/playground/rag.py +++ b/llama_stack/distribution/ui/page/playground/rag.py @@ -134,7 +134,7 @@ def rag_chat_page(): dict( name="builtin::rag/knowledge_search", args={ - "vector_db_ids": [vector_db_id for vector_db_id in selected_vector_dbs], + "vector_db_ids": list(selected_vector_dbs), }, ) ], diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py index 82bf00e3c..aae6b35d8 100644 --- a/llama_stack/distribution/utils/exec.py +++ b/llama_stack/distribution/utils/exec.py @@ -46,7 +46,7 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list: conda_env_info = json.loads(subprocess.check_output(["conda", "info", "--envs", "--json"]).decode()) envs = conda_env_info["envs"] for envpath in envs: - if envpath.endswith(env_name): + if os.path.basename(envpath) == env_name: return envpath return None diff --git a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py index 27b1a3502..74a3ae4f0 100644 --- a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +++ b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py @@ -226,10 +226,9 @@ class FunctionTagCustomToolGenerator(PromptTemplateGeneratorBase): class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801 DEFAULT_PROMPT = textwrap.dedent( """ + You are a helpful assistant. You have access to functions, but you should only use them if they are required. You are an expert in composing functions. You are given a question and a set of possible functions. - Based on the question, you will need to make one or more function/tool calls to achieve the purpose. - If none of the function can be used, point it out. If the given question lacks the parameters required by the function, - also point it out. You should only return the function call in tools call sections. + Based on the question, you may or may not need to make one function/tool call to achieve the purpose. {{ function_description }} """.strip("\n") diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 7995f4b31..97d2c12a0 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -611,8 +611,17 @@ class ChatAgent(ShieldRunnerMixin): if event.stop_reason is not None: stop_reason = event.stop_reason span.set_attribute("stop_reason", stop_reason) - span.set_attribute("input", [m.model_dump_json() for m in input_messages]) - span.set_attribute("output", f"content: {content} tool_calls: {tool_calls}") + span.set_attribute( + "input", + json.dumps([json.loads(m.model_dump_json()) for m in input_messages]), + ) + output_attr = json.dumps( + { + "content": content, + "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], + } + ) + span.set_attribute("output", output_attr) n_iter += 1 await self.storage.set_num_infer_iters_in_turn(session_id, turn_id, n_iter) @@ -796,10 +805,10 @@ class ChatAgent(ShieldRunnerMixin): self, toolgroups_for_turn: Optional[List[AgentToolGroup]] = None ) -> Tuple[List[ToolDefinition], Dict[str, str]]: # Determine which tools to include - agent_config_toolgroups = set( - (toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup) + agent_config_toolgroups = { + toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup for toolgroup in self.agent_config.toolgroups - ) + } toolgroups_for_turn_set = ( agent_config_toolgroups if toolgroups_for_turn is None diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index 18d408a31..a01f7f1f3 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -3,6 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import json from typing import Any, Dict, List, Optional from tqdm import tqdm @@ -86,7 +87,6 @@ class MetaReferenceEvalImpl( ) -> Job: task_def = self.benchmarks[benchmark_id] dataset_id = task_def.dataset_id - candidate = task_config.eval_candidate scoring_functions = task_def.scoring_functions dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.eval.value)) @@ -117,7 +117,7 @@ class MetaReferenceEvalImpl( generations = [] for i, x in tqdm(enumerate(input_rows)): assert ColumnName.chat_completion_input.value in x, "Invalid input row" - input_messages = eval(str(x[ColumnName.chat_completion_input.value])) + input_messages = json.loads(x[ColumnName.chat_completion_input.value]) input_messages = [UserMessage(**x) for x in input_messages] # NOTE: only single-turn agent generation is supported. Create a new session for each input row @@ -159,7 +159,7 @@ class MetaReferenceEvalImpl( generations = [] for x in tqdm(input_rows): if ColumnName.completion_input.value in x: - input_content = eval(str(x[ColumnName.completion_input.value])) + input_content = json.loads(x[ColumnName.completion_input.value]) response = await self.inference_api.completion( model=candidate.model, content=input_content, @@ -167,9 +167,8 @@ class MetaReferenceEvalImpl( ) generations.append({ColumnName.generated_answer.value: response.completion_message.content}) elif ColumnName.chat_completion_input.value in x: - chat_completion_input_str = str(x[ColumnName.chat_completion_input.value]) - input_messages = eval(chat_completion_input_str) - input_messages = [UserMessage(**x) for x in input_messages] + chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value]) + input_messages = [UserMessage(**x) for x in chat_completion_input_json] messages = [] if candidate.system_message: messages.append(candidate.system_message) diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index 763d9664d..516ac1ad8 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -208,7 +208,6 @@ class MetaReferenceInferenceImpl( logprobs = [] stop_reason = None - tokenizer = self.generator.formatter.tokenizer for token_result in self.generator.completion(request): tokens.append(token_result.token) if token_result.text == "<|eot_id|>": diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index 658267f7f..91d0445ab 100644 --- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -207,7 +207,7 @@ def maybe_parse_message(maybe_json: Optional[str]) -> Optional[ProcessingMessage return parse_message(maybe_json) except json.JSONDecodeError: return None - except ValueError as e: + except ValueError: return None @@ -352,7 +352,7 @@ class ModelParallelProcessGroup: if isinstance(obj, TaskResponse): yield obj.result - except GeneratorExit as e: + except GeneratorExit: self.request_socket.send(encode_msg(CancelSentinel())) while True: obj_json = self.request_socket.send() diff --git a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py index 014a26f09..cecb66dd3 100644 --- a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py +++ b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py @@ -7,6 +7,9 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. +# The file gets a special treatment for now? +# ruff: noqa: N803 + import unittest import torch diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py index 884977803..6b607f1c7 100644 --- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py @@ -10,16 +10,19 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import json from typing import Any, Mapping from llama_stack.providers.utils.common.data_schema_validator import ColumnName -def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]: +def llama_stack_instruct_to_torchtune_instruct( + sample: Mapping[str, Any], +) -> Mapping[str, Any]: assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, ( "Invalid input row" ) - input_messages = eval(str(sample[ColumnName.chat_completion_input.value])) + input_messages = json.loads(sample[ColumnName.chat_completion_input.value]) assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message" input_message = input_messages[0] @@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]: assert ColumnName.dialog.value in sample, "Invalid input row" role_map = {"user": "human", "assistant": "gpt"} - dialog = eval(str(sample[ColumnName.dialog.value])) + dialog = json.loads(sample[ColumnName.dialog.value]) assert len(dialog) > 1, "dialog must have at least 2 messagse" roles = [] diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 41387474f..c88787f18 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -264,7 +264,7 @@ class LoraFinetuningSingleDevice: ) self.adapter_params = get_adapter_params(model) - self._is_dora = any(["magnitude" in k for k in self.adapter_params.keys()]) + self._is_dora = any("magnitude" in k for k in self.adapter_params.keys()) set_trainable_params(model, self.adapter_params) diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/llama_stack/providers/inline/scoring/braintrust/braintrust.py index be0f023f3..a48b6b58b 100644 --- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py +++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -133,7 +133,7 @@ class BraintrustScoringImpl( async def shutdown(self) -> None: ... async def list_scoring_functions(self) -> List[ScoringFn]: - scoring_fn_defs_list = [x for x in self.supported_fn_defs_registry.values()] + scoring_fn_defs_list = list(self.supported_fn_defs_registry.values()) for f in scoring_fn_defs_list: assert f.identifier.startswith("braintrust"), ( "All braintrust scoring fn must have identifier prefixed with 'braintrust'! " diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 2ca7dd578..db9e176ee 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -198,7 +198,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): tool_config: Optional[ToolConfig] = None, ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]: if tool_prompt_format: - warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring") + warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring", stacklevel=2) await check_health(self._config) # this raises errors diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py index 1849fda6d..0582cb816 100644 --- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py +++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py @@ -106,7 +106,7 @@ async def convert_chat_completion_request( payload.update(temperature=strategy.temperature) elif isinstance(strategy, TopKSamplingStrategy): if strategy.top_k != -1 and strategy.top_k < 1: - warnings.warn("top_k must be -1 or >= 1") + warnings.warn("top_k must be -1 or >= 1", stacklevel=2) nvext.update(top_k=strategy.top_k) elif isinstance(strategy, GreedySamplingStrategy): nvext.update(top_k=-1) @@ -168,7 +168,7 @@ def convert_completion_request( payload.update(top_p=request.sampling_params.top_p) elif request.sampling_params.strategy == "top_k": if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1: - warnings.warn("top_k must be -1 or >= 1") + warnings.warn("top_k must be -1 or >= 1", stacklevel=2) nvext.update(top_k=request.sampling_params.top_k) elif request.sampling_params.strategy == "greedy": nvext.update(top_k=-1) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 967a3e44d..8ec23cd90 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -270,6 +270,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): tool_config: Optional[ToolConfig] = None, ) -> AsyncGenerator: model = await self.model_store.get_model(model_id) + # This is to be consistent with OpenAI API and support vLLM <= v0.6.3 + # References: + # * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice + # * https://github.com/vllm-project/vllm/pull/10000 + if not tools and tool_config is not None: + tool_config.tool_choice = ToolChoice.none request = ChatCompletionRequest( model=model.provider_resource_id, messages=messages, diff --git a/llama_stack/providers/tests/eval/test_eval.py b/llama_stack/providers/tests/eval/test_eval.py index ad80b8601..9ce3a972b 100644 --- a/llama_stack/providers/tests/eval/test_eval.py +++ b/llama_stack/providers/tests/eval/test_eval.py @@ -39,12 +39,11 @@ class Testeval: @pytest.mark.asyncio async def test_eval_evaluate_rows(self, eval_stack, inference_model, judge_model): - eval_impl, benchmarks_impl, datasetio_impl, datasets_impl, models_impl = ( + eval_impl, benchmarks_impl, datasetio_impl, datasets_impl = ( eval_stack[Api.eval], eval_stack[Api.benchmarks], eval_stack[Api.datasetio], eval_stack[Api.datasets], - eval_stack[Api.models], ) await register_dataset(datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval") @@ -92,11 +91,10 @@ class Testeval: @pytest.mark.asyncio async def test_eval_run_eval(self, eval_stack, inference_model, judge_model): - eval_impl, benchmarks_impl, datasets_impl, models_impl = ( + eval_impl, benchmarks_impl, datasets_impl = ( eval_stack[Api.eval], eval_stack[Api.benchmarks], eval_stack[Api.datasets], - eval_stack[Api.models], ) await register_dataset(datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval") @@ -131,11 +129,10 @@ class Testeval: @pytest.mark.asyncio async def test_eval_run_benchmark_eval(self, eval_stack, inference_model): - eval_impl, benchmarks_impl, datasets_impl, models_impl = ( + eval_impl, benchmarks_impl, datasets_impl = ( eval_stack[Api.eval], eval_stack[Api.benchmarks], eval_stack[Api.datasets], - eval_stack[Api.models], ) response = await datasets_impl.list_datasets() diff --git a/llama_stack/providers/tests/report.py b/llama_stack/providers/tests/report.py index febd13045..c9a7f69a8 100644 --- a/llama_stack/providers/tests/report.py +++ b/llama_stack/providers/tests/report.py @@ -18,54 +18,48 @@ from llama_stack.models.llama.sku_list import all_registered_models INFERENCE_APIS = ["chat_completion"] FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"] SUPPORTED_MODELS = { - "ollama": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_1b.value, - ] - ), - "fireworks": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_11b_vision.value, - ] - ), - "together": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_11b_vision.value, - ] - ), + "ollama": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_1b.value, + }, + "fireworks": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_11b_vision.value, + }, + "together": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_11b_vision.value, + }, } diff --git a/llama_stack/providers/tests/scoring/test_scoring.py b/llama_stack/providers/tests/scoring/test_scoring.py index e98fd8627..d80b105f4 100644 --- a/llama_stack/providers/tests/scoring/test_scoring.py +++ b/llama_stack/providers/tests/scoring/test_scoring.py @@ -45,13 +45,11 @@ class TestScoring: scoring_functions_impl, datasetio_impl, datasets_impl, - models_impl, ) = ( scoring_stack[Api.scoring], scoring_stack[Api.scoring_functions], scoring_stack[Api.datasetio], scoring_stack[Api.datasets], - scoring_stack[Api.models], ) scoring_fns_list = await scoring_functions_impl.list_scoring_functions() provider_id = scoring_fns_list[0].provider_id @@ -102,13 +100,11 @@ class TestScoring: scoring_functions_impl, datasetio_impl, datasets_impl, - models_impl, ) = ( scoring_stack[Api.scoring], scoring_stack[Api.scoring_functions], scoring_stack[Api.datasetio], scoring_stack[Api.datasets], - scoring_stack[Api.models], ) await register_dataset(datasets_impl, for_rag=True) response = await datasets_impl.list_datasets() @@ -163,13 +159,11 @@ class TestScoring: scoring_functions_impl, datasetio_impl, datasets_impl, - models_impl, ) = ( scoring_stack[Api.scoring], scoring_stack[Api.scoring_functions], scoring_stack[Api.datasetio], scoring_stack[Api.datasets], - scoring_stack[Api.models], ) await register_dataset(datasets_impl, for_rag=True) rows = await datasetio_impl.get_rows_paginated( diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index eaf5ad2e1..98c2bfd2e 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -6,7 +6,7 @@ import json import logging import warnings -from typing import AsyncGenerator, Dict, Generator, Iterable, List, Optional, Union +from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union from openai import AsyncStream from openai.types.chat import ( @@ -605,7 +605,7 @@ def convert_tool_call( tool_name=tool_call.function.name, arguments=json.loads(tool_call.function.arguments), ) - except Exception as e: + except Exception: return UnparseableToolCall( call_id=tool_call.id or "", tool_name=tool_call.function.name or "", @@ -841,14 +841,13 @@ async def convert_openai_chat_completion_stream( Convert a stream of OpenAI chat completion chunks into a stream of ChatCompletionResponseStreamChunk. """ - - # generate a stream of ChatCompletionResponseEventType: start -> progress -> progress -> ... - def _event_type_generator() -> Generator[ChatCompletionResponseEventType, None, None]: - yield ChatCompletionResponseEventType.start - while True: - yield ChatCompletionResponseEventType.progress - - event_type = _event_type_generator() + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta=TextDelta(text=""), + ) + ) + event_type = ChatCompletionResponseEventType.progress stop_reason = None toolcall_buffer = {} @@ -868,7 +867,7 @@ async def convert_openai_chat_completion_stream( if choice.delta.content: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=TextDelta(text=choice.delta.content), logprobs=_convert_openai_logprobs(logprobs), ) @@ -877,7 +876,9 @@ async def convert_openai_chat_completion_stream( # it is possible to have parallel tool calls in stream, but # ChatCompletionResponseEvent only supports one per stream if len(choice.delta.tool_calls) > 1: - warnings.warn("multiple tool calls found in a single delta, using the first, ignoring the rest") + warnings.warn( + "multiple tool calls found in a single delta, using the first, ignoring the rest", stacklevel=2 + ) if not enable_incremental_tool_calls: yield ChatCompletionResponseStreamChunk( @@ -909,7 +910,7 @@ async def convert_openai_chat_completion_stream( toolcall_buffer["content"] += delta yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=ToolCallDelta( tool_call=delta, parse_status=ToolCallParseStatus.in_progress, @@ -920,7 +921,7 @@ async def convert_openai_chat_completion_stream( else: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=TextDelta(text=choice.delta.content or ""), logprobs=_convert_openai_logprobs(logprobs), ) @@ -931,7 +932,7 @@ async def convert_openai_chat_completion_stream( toolcall_buffer["content"] += delta yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=ToolCallDelta( tool_call=delta, parse_status=ToolCallParseStatus.in_progress, diff --git a/llama_stack/providers/utils/kvstore/redis/redis.py b/llama_stack/providers/utils/kvstore/redis/redis.py index f5254198b..a390ea866 100644 --- a/llama_stack/providers/utils/kvstore/redis/redis.py +++ b/llama_stack/providers/utils/kvstore/redis/redis.py @@ -36,7 +36,7 @@ class RedisKVStoreImpl(KVStore): value = await self.redis.get(key) if value is None: return None - ttl = await self.redis.ttl(key) + await self.redis.ttl(key) return value async def delete(self, key: str) -> None: diff --git a/llama_stack/providers/utils/scoring/aggregation_utils.py b/llama_stack/providers/utils/scoring/aggregation_utils.py index 35c4ee180..6686e4ade 100644 --- a/llama_stack/providers/utils/scoring/aggregation_utils.py +++ b/llama_stack/providers/utils/scoring/aggregation_utils.py @@ -32,7 +32,7 @@ def aggregate_categorical_count( scoring_results: List[ScoringResultRow], ) -> Dict[str, Any]: scores = [str(r["score"]) for r in scoring_results] - unique_scores = sorted(list(set(scores))) + unique_scores = sorted(set(scores)) return {"categorical_count": {s: scores.count(s) for s in unique_scores}} diff --git a/llama_stack/providers/utils/scoring/base_scoring_fn.py b/llama_stack/providers/utils/scoring/base_scoring_fn.py index a741e5baa..d28c57cc1 100644 --- a/llama_stack/providers/utils/scoring/base_scoring_fn.py +++ b/llama_stack/providers/utils/scoring/base_scoring_fn.py @@ -66,7 +66,7 @@ class RegisteredBaseScoringFn(BaseScoringFn): return self.__class__.__name__ def get_supported_scoring_fn_defs(self) -> List[ScoringFn]: - return [x for x in self.supported_fn_defs_registry.values()] + return list(self.supported_fn_defs_registry.values()) def register_scoring_fn_def(self, scoring_fn: ScoringFn) -> None: if scoring_fn.identifier in self.supported_fn_defs_registry: diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py index 924274c42..525ade74d 100644 --- a/llama_stack/providers/utils/telemetry/trace_protocol.py +++ b/llama_stack/providers/utils/telemetry/trace_protocol.py @@ -6,6 +6,7 @@ import asyncio import inspect +import json from functools import wraps from typing import Any, AsyncGenerator, Callable, Type, TypeVar @@ -17,6 +18,10 @@ T = TypeVar("T") def serialize_value(value: Any) -> Primitive: + return str(_prepare_for_json(value)) + + +def _prepare_for_json(value: Any) -> str: """Serialize a single value into JSON-compatible format.""" if value is None: return "" @@ -25,9 +30,17 @@ def serialize_value(value: Any) -> Primitive: elif hasattr(value, "_name_"): return value._name_ elif isinstance(value, BaseModel): - return value.model_dump_json() + return json.loads(value.model_dump_json()) + elif isinstance(value, (list, tuple, set)): + return [_prepare_for_json(item) for item in value] + elif isinstance(value, dict): + return {str(k): _prepare_for_json(v) for k, v in value.items()} else: - return str(value) + try: + json.dumps(value) + return value + except Exception: + return str(value) def trace_protocol(cls: Type[T]) -> Type[T]: @@ -104,7 +117,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]: result = method(self, *args, **kwargs) span.set_attribute("output", serialize_value(result)) return result - except Exception as _e: + except Exception as e: + span.set_attribute("error", str(e)) raise if is_async_gen: diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py index 76c7283eb..92c82983e 100644 --- a/llama_stack/scripts/distro_codegen.py +++ b/llama_stack/scripts/distro_codegen.py @@ -99,7 +99,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[ template = template_func() normal_deps, special_deps = get_provider_dependencies(template.providers) # Combine all dependencies in order: normal deps, special deps, server deps - all_deps = sorted(list(set(normal_deps + SERVER_DEPENDENCIES))) + sorted(list(set(special_deps))) + all_deps = sorted(set(normal_deps + SERVER_DEPENDENCIES)) + sorted(set(special_deps)) return template.name, all_deps except Exception: diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md index 60556a6f3..87438fb6d 100644 --- a/llama_stack/templates/meta-reference-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-gpu/doc_template.md @@ -29,12 +29,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md index 2b117120c..e8dfaaf3c 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md @@ -31,12 +31,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 83c7b1a63..3c24a41ba 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate: "inference": [inference_provider], "vector_io": [vector_io_provider_sqlite], }, - default_models=[inference_model], + default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, ), "run-with-safety.yaml": RunConfigSettings( diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 0c82552c6..a2428688e 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -90,6 +90,12 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding shields: [] vector_dbs: [] datasets: [] diff --git a/pyproject.toml b/pyproject.toml index dc5659f06..893aa3330 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,39 +123,16 @@ select = [ "I", # isort ] ignore = [ - "E203", - "E305", - "E402", - "E501", # line too long - "E721", - "E741", - "F405", - "F841", - "C408", # ignored because we like the dict keyword argument syntax - "E302", - "W291", - "E303", - "N812", # ignored because import torch.nn.functional as F is PyTorch convention - "N817", # ignored because importing using acronyms is convention (DistributedDataParallel as DDP) - "E731", # allow usage of assigning lambda expressions + # The following ignores are desired by the project maintainers. + "E402", # Module level import not at top of file + "E501", # Line too long + "F405", # Maybe undefined or defined from star import + "C408", # Ignored because we like the dict keyword argument syntax + "N812", # Ignored because import torch.nn.functional as F is PyTorch convention + # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later. - "C901", - "C405", - "C414", - "N803", - "N999", - "C403", - "C416", - "B028", - "C419", - "C401", - "B023", - # shebang has extra meaning in fbcode lints, so I think it's not worth trying - # to line this up with executable bit - "EXE001", - "N802", # random naming hints don't need + "C901", # Complexity of the function is too high # these ignores are from flake8-bugbear; please fix! - "B007", "B008", ] diff --git a/tests/client-sdk/__init__.py b/tests/client-sdk/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/__init__.py +++ b/tests/client-sdk/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/agents/__init__.py b/tests/client-sdk/agents/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/agents/__init__.py +++ b/tests/client-sdk/agents/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 6e3dc0739..9690a8139 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -4,20 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json -from typing import Dict, List from uuid import uuid4 import pytest from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.client_tool import ClientTool +from llama_stack_client.lib.agents.client_tool import client_tool from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types import ToolResponseMessage from llama_stack_client.types.agents.turn_create_params import Document as AgentDocument from llama_stack_client.types.memory_insert_params import Document -from llama_stack_client.types.shared.completion_message import CompletionMessage from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig -from llama_stack_client.types.tool_def_param import Parameter from llama_stack.apis.agents.agents import ( AgentConfig as Server__AgentConfig, @@ -27,63 +22,22 @@ from llama_stack.apis.agents.agents import ( ) -class TestClientTool(ClientTool): - """Tool to give boiling point of a liquid - Returns the correct value for polyjuice in Celcius and Fahrenheit - and returns -1 for other liquids +@client_tool +def get_boiling_point(liquid_name: str, celcius: bool = True) -> int: """ + Returns the boiling point of a liquid in Celcius or Fahrenheit - def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: - assert len(messages) == 1, "Expected single message" - - message = messages[0] - - tool_call = message.tool_calls[0] - - try: - response = self.run_impl(**tool_call.arguments) - response_str = json.dumps(response, ensure_ascii=False) - except Exception as e: - response_str = f"Error when running tool: {e}" - - message = ToolResponseMessage( - role="tool", - call_id=tool_call.call_id, - tool_name=tool_call.tool_name, - content=response_str, - ) - return message - - def get_name(self) -> str: - return "get_boiling_point" - - def get_description(self) -> str: - return "Get the boiling point of imaginary liquids (eg. polyjuice)" - - def get_params_definition(self) -> Dict[str, Parameter]: - return { - "liquid_name": Parameter( - name="liquid_name", - parameter_type="string", - description="The name of the liquid", - required=True, - ), - "celcius": Parameter( - name="celcius", - parameter_type="boolean", - description="Whether to return the boiling point in Celcius", - required=False, - ), - } - - def run_impl(self, liquid_name: str, celcius: bool = True) -> int: - if liquid_name.lower() == "polyjuice": - if celcius: - return -100 - else: - return -212 + :param liquid_name: The name of the liquid + :param celcius: Whether to return the boiling point in Celcius + :return: The boiling point of the liquid in Celcius or Fahrenheit + """ + if liquid_name.lower() == "polyjuice": + if celcius: + return -100 else: - return -1 + return -212 + else: + return -1 @pytest.fixture(scope="session") @@ -298,7 +252,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config): def test_custom_tool(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "toolgroups": ["builtin::websearch"], @@ -326,7 +280,7 @@ def test_custom_tool(llama_stack_client, agent_config): def test_tool_choice(llama_stack_client, agent_config): def run_agent(tool_choice): - client_tool = TestClientTool() + client_tool = get_boiling_point test_agent_config = { **agent_config, @@ -362,7 +316,7 @@ def test_tool_choice(llama_stack_client, agent_config): # TODO: fix this flaky test def xtest_override_system_message_behavior(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "instructions": "You are a pirate", @@ -458,7 +412,6 @@ def test_rag_agent(llama_stack_client, agent_config, rag_tool_name): vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_id="faiss", ) llama_stack_client.tool_runtime.rag_tool.insert( documents=documents, @@ -587,7 +540,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config): def test_create_turn_response(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "input_shields": [], diff --git a/tests/client-sdk/conftest.py b/tests/client-sdk/conftest.py index c0f4dca53..3ecf45086 100644 --- a/tests/client-sdk/conftest.py +++ b/tests/client-sdk/conftest.py @@ -117,7 +117,7 @@ def client_with_models(llama_stack_client, text_model_id, vision_model_id, embed assert len(providers) > 0, "No inference providers found" inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"] - model_ids = set(m.identifier for m in client.models.list()) + model_ids = {m.identifier for m in client.models.list()} model_ids.update(m.provider_resource_id for m in client.models.list()) if text_model_id and text_model_id not in model_ids: diff --git a/tests/client-sdk/inference/__init__.py b/tests/client-sdk/inference/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/inference/__init__.py +++ b/tests/client-sdk/inference/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index 46a901d62..075f927f7 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -75,6 +75,26 @@ DUMMY_IMAGE_URL = ImageContentItem( image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image" ) DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image") +SUPPORTED_PROVIDERS = {"remote::nvidia"} +MODELS_SUPPORTING_MEDIA = {} +MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"} +MODELS_REQUIRING_TASK_TYPE = { + "nvidia/llama-3.2-nv-embedqa-1b-v2", + "nvidia/nv-embedqa-e5-v5", + "nvidia/nv-embedqa-mistral-7b-v2", + "snowflake/arctic-embed-l", +} +MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE + + +def default_task_type(model_id): + """ + Some models require a task type parameter. This provides a default value for + testing those models. + """ + if model_id in MODELS_REQUIRING_TASK_TYPE: + return {"task_type": "query"} + return {} @pytest.mark.parametrize( @@ -88,8 +108,12 @@ DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64st "list[text]", ], ) -def test_embedding_text(llama_stack_client, embedding_model_id, contents): - response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) +def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id) + ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) @@ -107,9 +131,14 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents): "list[url,string,base64,text]", ], ) -@pytest.mark.xfail(reason="Media is not supported") -def test_embedding_image(llama_stack_client, embedding_model_id, contents): - response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) +def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + if embedding_model_id not in MODELS_SUPPORTING_MEDIA: + pytest.xfail(f"{embedding_model_id} doesn't support media") + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id) + ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) @@ -134,9 +163,16 @@ def test_embedding_image(llama_stack_client, embedding_model_id, contents): "short", ], ) -def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents): +def test_embedding_truncation( + llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type +): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=contents, text_truncation=text_truncation + model_id=embedding_model_id, + contents=contents, + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == 1 @@ -162,25 +198,43 @@ def test_embedding_truncation(llama_stack_client, embedding_model_id, text_trunc "long-str", ], ) -def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents): - with pytest.raises(BadRequestError) as excinfo: +def test_embedding_truncation_error( + llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type +): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_LONG_TEXT], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) -@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction") -def test_embedding_output_dimension(llama_stack_client, embedding_model_id): - base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING]) +def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION: + pytest.xfail(f"{embedding_model_id} doesn't support output_dimension") + base_response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id) + ) test_response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32 + model_id=embedding_model_id, + contents=[DUMMY_STRING], + **default_task_type(embedding_model_id), + output_dimension=32, ) assert len(base_response.embeddings[0]) != len(test_response.embeddings[0]) assert len(test_response.embeddings[0]) == 32 -@pytest.mark.xfail(reason="Only valid for model supporting task type") -def test_embedding_task_type(llama_stack_client, embedding_model_id): +def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE: + pytest.xfail(f"{embedding_model_id} doesn't support task_type") query_embedding = llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query" ) @@ -199,9 +253,14 @@ def test_embedding_task_type(llama_stack_client, embedding_model_id): "start", ], ) -def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation): +def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_STRING], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == 1 @@ -219,8 +278,15 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_ "right", ], ) -def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation): - with pytest.raises(BadRequestError) as excinfo: +def test_embedding_text_truncation_error( + llama_stack_client, embedding_model_id, text_truncation, inference_provider_type +): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_STRING], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) diff --git a/tests/client-sdk/inference/test_text_inference.py b/tests/client-sdk/inference/test_text_inference.py index 7850d2d57..63813a1cc 100644 --- a/tests/client-sdk/inference/test_text_inference.py +++ b/tests/client-sdk/inference/test_text_inference.py @@ -139,7 +139,7 @@ def test_text_completion_log_probs_streaming(client_with_models, text_model_id, "top_k": 1, }, ) - streamed_content = [chunk for chunk in response] + streamed_content = list(response) for chunk in streamed_content: if chunk.delta: # if there's a token, we expect logprobs assert chunk.logprobs, "Logprobs should not be empty" @@ -405,7 +405,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request( assert delta.tool_call.tool_name == "get_object_namespace_list" if delta.type == "tool_call" and delta.parse_status == "failed": # expect raw message that failed to parse in tool_call - assert type(delta.tool_call) == str + assert isinstance(delta.tool_call, str) assert len(delta.tool_call) > 0 else: for tc in response.completion_message.tool_calls: diff --git a/tests/client-sdk/report.py b/tests/client-sdk/report.py index b946b85ba..0151b3d20 100644 --- a/tests/client-sdk/report.py +++ b/tests/client-sdk/report.py @@ -42,29 +42,27 @@ def featured_models(): SUPPORTED_MODELS = { - "ollama": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_1b.value, - ] - ), - "tgi": set([model.core_model_id.value for model in all_registered_models() if model.huggingface_repo]), - "vllm": set([model.core_model_id.value for model in all_registered_models() if model.huggingface_repo]), + "ollama": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_1b.value, + }, + "tgi": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo}, + "vllm": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo}, } diff --git a/tests/client-sdk/safety/__init__.py b/tests/client-sdk/safety/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/safety/__init__.py +++ b/tests/client-sdk/safety/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/safety/test_safety.py b/tests/client-sdk/safety/test_safety.py index 1417a9c06..79963e4d4 100644 --- a/tests/client-sdk/safety/test_safety.py +++ b/tests/client-sdk/safety/test_safety.py @@ -42,7 +42,7 @@ def code_scanner_shield_id(available_shields): @pytest.fixture(scope="session") def model_providers(llama_stack_client): - return set([x.provider_id for x in llama_stack_client.providers.list() if x.api == "inference"]) + return {x.provider_id for x in llama_stack_client.providers.list() if x.api == "inference"} def test_unsafe_examples(llama_stack_client, llama_guard_text_shield_id): diff --git a/tests/client-sdk/tool_runtime/test_rag_tool.py b/tests/client-sdk/tool_runtime/test_rag_tool.py index 40940f1ef..e330a10f5 100644 --- a/tests/client-sdk/tool_runtime/test_rag_tool.py +++ b/tests/client-sdk/tool_runtime/test_rag_tool.py @@ -24,7 +24,6 @@ def single_entry_vector_db_registry(llama_stack_client, empty_vector_db_registry vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_id="faiss", ) vector_dbs = [vector_db.identifier for vector_db in llama_stack_client.vector_dbs.list()] return vector_dbs @@ -121,7 +120,6 @@ def test_vector_db_insert_from_url_and_query(llama_stack_client, empty_vector_db vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_id="faiss", ) # list to check memory bank is successfully registered diff --git a/tests/client-sdk/vector_io/__init__.py b/tests/client-sdk/vector_io/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/vector_io/__init__.py +++ b/tests/client-sdk/vector_io/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999